In [2]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [3]:
music_data = genfromtxt('year-prediction-msd-train.txt', delimiter=',')

In [4]:
# Split year values from the array
music_y = music_data[:, 0]

music_x = music_data[:, 1:90]

In [5]:
# Split the data using K-Folds, using 5 different sets
kf = KFold(n_splits=5)
kf.get_n_splits(music_x)

5

In [6]:
count = 0
train_mse = np.zeros(5)
val_mse = np.zeros(5)
for train_index, val_index in kf.split(music_x):
    music_x_train, music_x_val = music_x[train_index], music_x[val_index]
    music_y_train, music_y_val = music_y[train_index], music_y[val_index]
    
    regr = linear_model.LinearRegression(normalize = True)
    regr.fit(music_x_train, music_y_train)
    music_y_val_pred = regr.predict(music_x_val)
    music_y_train_pred = regr.predict(music_x_train)
    #print('Estimated intercept: ', regr.intercept_)
    #print('Coefficients: ', regr.coef_)
    count += 1
    train_mse[count-1] = mean_squared_error(music_y_train, music_y_train_pred)
    val_mse[count-1] = mean_squared_error(music_y_val, music_y_val_pred)
    print("Set %d -- Train MSE: %.2f Validation MSE: %.2f"
      % (count, mean_squared_error(music_y_train, music_y_train_pred), mean_squared_error(music_y_val, music_y_val_pred)))



Set 1 -- Train MSE: 91.45 Validation MSE: 90.51
Set 2 -- Train MSE: 91.53 Validation MSE: 90.20
Set 3 -- Train MSE: 90.99 Validation MSE: 92.34
Set 4 -- Train MSE: 91.24 Validation MSE: 91.33
Set 5 -- Train MSE: 91.05 Validation MSE: 92.12


In [7]:
print("Mean Train MSE: %.2f Mean Validation MSE: %.2f" % (np.average(train_mse), np.average(val_mse)))
# The coefficients
print('Estimated intercept: ', regr.intercept_)

# The coefficients
print('Coefficients: ', regr.coef_)

Mean Train MSE: 91.25 Mean Validation MSE: 91.30
Estimated intercept:  1951.10281144
Coefficients:  [  8.75639345e-01  -5.62540046e-02  -4.41323415e-02   4.11333022e-03
  -1.50999673e-02  -2.19989936e-01  -6.75886934e-03  -1.00055899e-01
  -6.53097961e-02   2.01988381e-02  -1.68867620e-01  -2.01718001e-03
   4.70676496e-02   3.59014415e-04  -4.47912002e-04   6.29820419e-04
   4.12670283e-04   1.39570601e-03   1.97328269e-03   2.20417993e-03
   7.48751599e-04  -6.44767497e-04   7.69927317e-03   2.79270644e-03
  -3.59534246e-03   3.38734605e-05   1.61345349e-03   5.32076408e-04
   9.88023250e-04  -1.74057071e-04  -1.41016329e-03  -1.38827440e-03
  -5.53457219e-03   2.20287341e-03   1.36076553e-03  -5.15449816e-03
  -2.57536356e-04   6.78848759e-04   1.37075992e-03  -1.71279897e-03
  -2.25715740e-03  -7.57222803e-04  -1.45860693e-03  -1.93512222e-03
  -3.27479436e-03   6.89072383e-03   4.70857954e-04  -2.02695788e-03
   3.21436507e-04   2.01635435e-03  -3.96283765e-05  -1.84255266e-03
   