In [14]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
import json, csv
from scipy.stats import spearmanr
%matplotlib inline

In [15]:
try:
    with open("./Features/Features_data/featuretest.json", encoding='UTF8') as f:
        features = json.load(f)
except EnvironmentError:
    print('No Feature File')

try:
    with open("./Features/Features_data/scoretest.json", encoding='UTF8') as f:
        scores = json.load(f)
except EnvironmentError:
    print("No Score File")

feature_num = 16
x = np.array(features)
print(x.shape)

y = np.array(scores)
y = np.reshape(y,(-1,1))

kf = KFold(n_splits = 5, shuffle = True)

(1988, 16)


In [16]:
for train_index, test_index in kf.split(x):
    
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    regr = linear_model.LinearRegression()

    regr.fit(X_train, y_train)
    y_test_pred = regr.predict(X_test)
    y_test_pred = np.clip(y_test_pred, 1, 5)

    # The coefficients
    print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print("Mean squared error: %.2f"
          % mean_squared_error(y_test, y_test_pred))
    # Explained variance score: 1 is perfect prediction
    print('Variance score: %.2f' % r2_score(y_test, y_test_pred))
    print(spearmanr(y_test, y_test_pred))

Coefficients: 
 [[-3.80152876e-02  2.96330648e-01 -1.74959921e-01 -1.21370727e-01
  -3.55918726e-01 -5.61766516e-01 -1.11022302e-16  2.82382697e-02
  -5.97598764e-01  0.00000000e+00 -9.18422483e-02 -4.22784777e-02
   1.64473752e-03  2.19590338e-01  2.89196236e-01  7.49341043e-01]]
Mean squared error: 0.99
Variance score: 0.45
SpearmanrResult(correlation=0.6732324994973811, pvalue=7.190943290884453e-54)
Coefficients: 
 [[-8.26728347e-02  2.02361304e-01  3.21273559e-02 -2.34488660e-01
  -3.38239246e-01 -5.90281860e-01  8.32667268e-17  3.57999163e-02
  -5.72104682e-01  2.22044605e-16 -2.13903404e-01 -2.58842830e-01
   1.82097014e-03  1.56951944e-01  2.61654442e-01  7.63136268e-01]]
Mean squared error: 1.18
Variance score: 0.35
SpearmanrResult(correlation=0.595727835497744, pvalue=1.3585501998395413e-39)
Coefficients: 
 [[-5.08542193e-02  1.32531474e-01  1.19574765e-01 -2.52106240e-01
  -2.89965193e-01 -6.10570277e-01 -2.08166817e-16  4.64935435e-02
  -5.64011155e-01  1.11022302e-16 -2.742