In [110]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut, KFold

In [102]:
# Loading the data and normalize
reg = pd.read_csv('regression_dataset_training.csv', index_col=0)

In [141]:
# Separating the data and the outcome
X = reg.drop('vote', axis = 1)
y = reg['vote']

X_org = X

X = (X - X_org.mean()) / (X_org.max() - X_org.min())
X = X.as_matrix()
y = y.as_matrix()

In [157]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2016)

In [105]:
loo = LeaveOneOut()
n = loo.get_n_splits(X_train)

In [106]:
error = 0
lreg_baseline = linear_model.LinearRegression()

for val_train_index, val_test_index in loo.split(X_train):
    X_train_val, X_test_val = X_train[val_train_index, :], X_train[val_test_index, :]
    y_train_val, y_test_val = y_train[val_train_index], y_train[val_test_index]
    
    lreg_baseline.fit(X_train_val, y_train_val)
    y_pred = lreg_baseline.predict(X_test_val)
    
    error += (y_pred - y_test_val)**2

In [109]:
print(error/n)

lreg_baseline.fit(X_train, y_train)
y_pred = lreg_baseline.predict(X_test)

MSE = np.mean((y_pred - y_test)**2)
print(MSE)

[ 0.0480857]
0.0462119666604


# LASSO Regression

In [158]:
# Setting up the alpha for LASSO
kl = KFold(n_splits=100)
lasso_cv = linear_model.LassoCV(cv=kl, random_state=2016, n_jobs=3)
lasso_cv.fit(X_train, y_train)
print(lasso_cv.alpha_)

0.000352455640625


In [161]:
y_pred = lasso_cv.predict(X_test)
MSE = np.mean((y_pred - y_test)**2)
print('Lasso: ', MSE)

y_pred_round = np.around(y_pred, 2)
MSE = np.mean((y_pred_round - y_test)**2)
print('Lasso with rounding: ', MSE)

Lasso:  0.049336199495
Lasso with rounding:  0.0493644


In [162]:
# Loading the test data
test = pd.read_csv('regression_dataset_testing.csv', index_col = 0)
test = (test - X_org.mean()) / (X_org.max() - X_org.min())

pred = lasso_cv.predict(test)
pred = np.around(pred, 2)
data = {'ID': test.index, 'vote': pred}
test_dataframe = pd.DataFrame(data=data, index=None)
test_dataframe.to_csv('submission-lasso-3.csv', index=False)

# Elastic Net Regression

In [163]:
elastic = linear_model.ElasticNetCV(cv=kl, random_state=2016, n_jobs=3)
elastic.fit(X_train, y_train)
print(elastic.alpha_)

0.00070491128125
