In [None]:
from sklearn import svm, neighbors, preprocessing, metrics, utils
import csv
import random
import numpy as np
import pandas as pd
import math

def submit_score(predictions, team_key):
    """
    Submit your predictions for scoring

    Args:
        predictions (DataFrame): Pandas DataFrame containing the following required
            column:
                1. idx (int) - The unique identifier for each observation
                2. predictions (float) - Your predicted value
        team_key (str): Your team's unique identifier

    Returns:
        Response: Flask Response object. See the Response.text field to get the score
            from your latest submission. Your best score will be reflected on the
            leaderboard
    """

    import requests
    import json
    import numpy
    def default(o):
        if isinstance(o, numpy.int64):
            return int(o)
        raise TypeError

    API_ENDPOINT = "http://coe-hackathon-dot-atd-fn-anacoe-dev.appspot.com/submitscore"
    payload = {
        "team_key": team_key,
        "data": predictions.loc[:, ["idx", "predictions"]].to_dict(orient="records")
    }
    resp = requests.post(
        API_ENDPOINT,
        data=json.dumps(payload, default=default),
        headers={'Content-Type': 'application/json'}
    )
    
    if resp.status_code == 404:
        print(resp.json()['error'])
        return None
    
    elif resp.status_code != 200:
        raise ValueError('There was an error processing your request: '
                         '\n{}'.format(resp.text))
        return None
    else:
        score = resp.json()['score']
        print('Submission successful! Your score was \n{}'.format(score))
        return score

def read_data(filename='data/train.csv'):
    data = pd.read_csv(filename)
    data = data.replace([np.nan, np.inf, -np.inf], 0)
    
    if 'UNITS' in data.columns:
        data = utils.shuffle(data)
        truth = data['UNITS']
        samples = data.drop(['idx', 'UNITS'], axis=1)
    else:
        truth = []
        samples = data.drop([data.columns[0], 'idx'], axis=1)
        
    return (samples, truth)

def preprocess_data(data):
    ## transform nominals to numeric codes
    for col in data:
        if data[col].dtype == 'object':
            data[col] = data[col].astype('category').cat.codes
    
    ## normalize the data        
    min_max_scaler = preprocessing.MinMaxScaler()
    np_scaled = min_max_scaler.fit_transform(data)
    df_normalized = pd.DataFrame(np_scaled)
    return df_normalized
        
    
def train_SVR(samples, classes):
    clf = svm.SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
    clf.fit(samples, classes)
    return clf

def test_model(model, test_samples, truth):
    predictions = model.predict(test_samples)
    return zip(truth, predictions)

def upload_leaderboard(predictions):
    teamkey = '$pbkdf2-sha512$25000$BOA8p/S.N6aUktK6d6415g$5KWTQmlXfm30T4H/c/Vo8Tiacfqd/qst5n3nU9JLSyV3fqZxerFbbCYYwCc3KVlOyv1pnvGJga7CU/CMzM6yNw'
    upload_dataframe = pd.DataFrame()
    upload_dataframe['idx'] = range(upload_predictions.size)
    upload_dataframe['predictions'] = np.array(upload_predictions)
    upload_dataframe['predictions'] = upload_dataframe['predictions']
    upload_dataframe.shape
    submit_score(upload_dataframe[['idx','predictions']],teamkey)

In [None]:
raw_samples, truth = read_data()
processed_samples = preprocess_data(raw_samples)

In [None]:
model = train_SVR(processed_samples, truth)

In [None]:
predictions = model.predict(processed_samples[10000:11000])

In [None]:
math.sqrt(metrics.mean_squared_error(truth.values[10000:11000], predictions))

In [None]:
test_samples, _ = read_data('data/X_test.csv')
test_samples = preprocess_data(test_samples)

In [None]:
upload_predictions = model.predict(test_samples)

In [None]:
upload_leaderboard(upload_predictions)