In [8]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import cross_val_score, cross_validate, train_test_split
from sklearn.metrics import make_scorer, mean_squared_error

input_file = "train.csv"
output_file = "submission.txt"

# comma delimited is the default
df = pd.read_csv(input_file, header = 0)
# put the original column names in a python list
original_headers = list(df.columns.values)
# remove the non-numeric columns
df = df._get_numeric_data()
# put the numeric column names in a python list
numeric_headers = list(df.columns.values)
# create a numpy array with the numeric values for input into scikit-learn
data = df.values

x = data[:, 2:]
y = data[:, 1:2]

n = len(data)
folds = 10
lambdas = [0.01, 0.1, 1, 10, 100]


In [9]:
mse = make_scorer(mean_squared_error)
solution = []

# Do 10-fold cross validation and calculate average RMSE for each lambda:
clf = Ridge(fit_intercept=False)
for l in lambdas:
    print("lambda =", l, ":")
    clf.set_params(alpha=l)
    cv_results = cross_validate(clf, x, y, cv=folds, scoring=mse, return_estimator=True)
    
    print("Coefficients:")
    for model in cv_results['estimator']:
        print(model.coef_)
    
    print("Test MSEs:")
    MSEs = cv_results['test_score']
    
    RMSEs = np.sqrt(MSEs)
    print("MSEs:", MSEs)
    print("RMSEs:", RMSEs)
    
    print("Average RMSE:", RMSEs.mean())
    print()
    
    solution.append(RMSEs.mean())

lambda = 0.01 :
Coefficients:
[[-0.10108888  0.03846894 -0.02268879  2.39745249 -2.06229403  6.2115462
  -0.01804737 -0.90142152  0.14569282 -0.00900866 -0.52104981  0.01514141
  -0.33969079]]
[[-1.00544828e-01  5.69141886e-02  2.27958643e-04  2.90298268e+00
  -1.58086100e+00  5.69553807e+00  8.35282511e-04 -9.26860719e-01
   1.33624523e-01 -8.57246342e-03 -3.57075073e-01  1.40546315e-02
  -4.53592605e-01]]
[[-0.09046294  0.05017341 -0.00767076  3.0729374  -2.76478818  6.02655193
  -0.01290616 -1.07418568  0.17628867 -0.00949958 -0.35751609  0.01360101
  -0.42527017]]
[[-8.95653538e-02  5.65307663e-02  2.25260443e-02  2.70288420e+00
  -3.09141304e+00  5.78652692e+00 -3.50410022e-03 -9.57532661e-01
   1.72182772e-01 -9.55204699e-03 -3.59007603e-01  1.49680322e-02
  -4.33051426e-01]]
[[-0.10896455  0.0469815  -0.00627532  3.67817576 -2.71155514  5.94562464
  -0.00783161 -1.00450773  0.19406502 -0.00838754 -0.42913767  0.01655521
  -0.44976845]]
[[-8.89999507e-02  4.29641397e-02 -2.433292

In [10]:
np.savetxt(output_file, solution, fmt='%.8f')