In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from scipy.stats import randint
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

In [2]:
df = pd.read_csv('7406train.csv', header=None)

In [54]:
df_t = df.set_index([0,1])
mean = df_t.mean(axis=1)
var = df_t.var(axis=1)
df_t['mean'] = mean
df_t['var'] = var
df_t = df_t.loc[:, 'mean':].reset_index().rename(columns={0:'x1', 1:'x2', 'mean':'Ym', 'var':'Yv'})
df_t

  df_t['mean'] = mean
  df_t['var'] = var


Unnamed: 0,x1,x2,Ym,Yv
0,0.00,0.00,20.315556,97.676165
1,0.00,0.01,21.115456,112.681665
2,0.00,0.02,20.558719,94.542537
3,0.00,0.03,19.164957,81.550269
4,0.00,0.04,20.051926,99.869165
...,...,...,...,...
9995,0.99,0.95,60.997455,127.980617
9996,0.99,0.96,61.999813,100.911845
9997,0.99,0.97,62.093463,80.265974
9998,0.99,0.98,61.876952,53.491256


In [55]:
df_t.describe()

Unnamed: 0,x1,x2,Ym,Yv
count,10000.0,10000.0,10000.0,10000.0
mean,0.495,0.495,44.407966,235.14789
std,0.288675,0.288675,13.672209,88.370869
min,0.0,0.0,4.749797,1.722656
25%,0.2475,0.2475,34.471645,173.756718
50%,0.495,0.495,47.034508,258.930365
75%,0.7425,0.7425,55.897448,305.940257
max,0.99,0.99,67.629039,409.924192


In [56]:
X = df_t.drop(['Ym', 'Yv'], axis=1)
Ym = df_t['Ym']
Yv = df_t['Yv']

In [57]:
sc_X = StandardScaler()
sc_ym = StandardScaler()
sc_yv = StandardScaler()
Xs = sc_X.fit_transform(X)
ysm = sc_ym.fit_transform(Ym.values.reshape(-1, 1))
ysv = sc_yv.fit_transform(Yv.values.reshape(-1, 1))

In [58]:
Xsm_train, Xsm_test, ysm_train, ysm_test = train_test_split(Xs, ysm, test_size=0.2, random_state=0)
Xsv_train, Xsv_test, ysv_train, ysv_test = train_test_split(Xs, ysv, test_size=0.2, random_state=0)

In [59]:
rand_search = GridSearchCV(
    SVR(), 
    param_grid = {
        'kernel': ['rbf'],
        'gamma': ['auto'],
        'C': [2000]
    }, 
    cv=5
)

rand_search.fit(Xsm_train, ysm_train.ravel())

In [60]:
print('Best hyperparameters:',  rand_search.best_params_)

Best hyperparameters: {'C': 2000, 'gamma': 'auto', 'kernel': 'rbf'}


In [61]:
ysm_pred = rand_search.best_estimator_.predict(Xsm_test)

In [62]:
print(
    'MSE-Mean: ' + 
    str(mean_squared_error(
        sc_ym.inverse_transform(ysm_pred.reshape(-1,1)),
        sc_ym.inverse_transform(ysm_test))
       )
)

MSE-Mean: 1.2653697680270688


In [63]:
rand_search = GridSearchCV(
    SVR(), 
    param_grid = {
        'kernel': ['rbf'],
        'gamma': ['auto'],
        'C': [110]
    }, 
    cv=5
)

rand_search.fit(Xsv_train, ysv_train.ravel())

In [64]:
print('Best hyperparameters:',  rand_search.best_params_)

Best hyperparameters: {'C': 110, 'gamma': 'auto', 'kernel': 'rbf'}


In [65]:
ysv_pred = rand_search.best_estimator_.predict(Xsv_test)

In [66]:
print(
    'MSE-Mean: ' + 
    str(mean_squared_error(
        sc_yv.inverse_transform(ysv_pred.reshape(-1,1)),
        sc_yv.inverse_transform(ysv_test))
       )
)

MSE-Mean: 531.6501438237473


In [67]:
reg = SVR(kernel='rbf', gamma='auto', C=110)
reg.fit(Xs, ysv.ravel())

In [75]:
Xtest = pd.read_csv('7406test.csv', header=None)

In [77]:
Xtests = sc_X.transform(Xtest)



In [79]:
ypredVar = reg.predict(Xtests)
yPredVar = sc_yv.inverse_transform(ypredVar.reshape(-1,1))

In [86]:
dfFinal = pd.read_csv('1.mean.csv', header=None)
dfFinal = dfFinal.tail(-1)

In [87]:
dfFinal

Unnamed: 0,0,1,2
1,0.011679,0.002427,20.758784
2,0.011679,0.024405,20.729182
3,0.011679,0.058861,20.706443
4,0.011679,0.065451,20.703235
5,0.011679,0.071387,20.702313
...,...,...,...
2496,0.992762,0.838541,60.322309
2497,0.992762,0.924013,61.005001
2498,0.992762,0.927652,61.060253
2499,0.992762,0.942036,61.305804


In [90]:
dfFinal['3'] = yPredVar

In [91]:
dfFinal

Unnamed: 0,0,1,2,3
1,0.011679,0.002427,20.758784,105.841928
2,0.011679,0.024405,20.729182,103.372116
3,0.011679,0.058861,20.706443,101.183332
4,0.011679,0.065451,20.703235,100.964290
5,0.011679,0.071387,20.702313,100.813336
...,...,...,...,...
2496,0.992762,0.838541,60.322309,248.583847
2497,0.992762,0.924013,61.005001,159.835531
2498,0.992762,0.927652,61.060253,154.331597
2499,0.992762,0.942036,61.305804,130.933691


In [95]:
dfFinal.to_csv('1.Jacobson.Macmillan.csv', index=False, header=False)