# Regression

In [10]:
import learning
%load_ext autoreload
%autoreload 2
from IPython.core import display as ICD
import pandas as pd
from mord import OrdinalRidge
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
pd.options.display.max_columns = 999
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression
from tqdm import tqdm
from sklearn.externals import joblib
from sklearn.svm import SVR
import copy

def trystuff(output, train, test, models, transforms):
    minscore = 10000
    modeldict = {}
    _, Xtrain, ytrain = learning.prepare_base_data(train, output=output)
    transforms = copy.deepcopy(transforms)
    for transform in transforms:
        transform.fit(Xtrain, ytrain)
        Xt = transform.transform(Xtrain)
        models = copy.deepcopy(models)
        for model in models:
            model.fit(Xt, ytrain)
            if(output=="FPoints"):
                trues, preds, score = learning.score_overall(test, model, transform)
            else:
                trues, preds, score = learning.score(test, {output: model}, {output: transform})
            modeldict[(transform, model)] = score
            if(score < minscore):
                avgtrues = np.mean(trues)
                avgpreds = np.mean(preds)
                besttrans = transform
                bestmodel = model
                minscore = score
    return modeldict, bestmodel, besttrans, minscore, avgtrues, avgpreds

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Seperate models

In [12]:
models = [
    LinearRegression(),
    MLPRegressor(),
#     MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(20, 20), random_state=1),
    KNeighborsClassifier(5)
]

transforms = [
    SelectKBest(f_regression, k=5),
    SelectKBest(f_regression, k=6)
#     SelectKBest(f_regression, k=7),
#     SelectKBest(f_regression, k=8),
#     SelectKBest(f_regression, k=9),
#     SelectKBest(f_regression, k=10),
#     SelectKBest(f_regression, k=11),
#     SelectKBest(f_regression, k=12),
#     SelectKBest(f_regression, k=13),
#     SelectKBest(f_regression, k=14),
#     SelectKBest(f_regression, k=15),
#     SelectKBest(f_regression, k=16)
]

train = learning.get_base_data(2015)
test = learning.get_base_data(2014)

outputs = ["Goals", "Assists", "Shots", "Blocks"]
fmodels = {}
ftrans = {}
fscores = {}

print('Starting...')
for output in outputs:
    print(output)
    modeldict, m, trans, score, avgtrues, avgpreds = trystuff(output, train, test, models, transforms)
    print(m)
    print(trans)
    print(score)
    print(avgtrues)
    print(avgpreds)
    joblib.dump(modeldict, output + "_" + "modeldict.p")
    fmodels[output] = m
    ftrans[output] = trans
    fscores[output] = score
    
true, pred, score = learning.score(test, fmodels, ftrans)
print(score)

Starting...
Goals
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
SelectKBest(k=5, score_func=<function f_regression at 0x000001F4582FE9D8>)
25.8275862069
36.0689655172
10.2413793103
Assists
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
SelectKBest(k=5, score_func=<function f_regression at 0x000001F4582FE9D8>)
21.6781609195
31.724137931
10.0459770115
Shots
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.00

## One model

In [15]:
import learning
from sklearn.decomposition import PCA

models = [
    LinearRegression(),
    MLPRegressor(),
    Ridge(),
    Lasso(),
    SVR()
]

transforms = [
    PCA(n_components=1),
    PCA(n_components=2),
    PCA(n_components=3),
    PCA(n_components=4),
    PCA(n_components=5),
    PCA(n_components=6),
    PCA(n_components=7),
    PCA(n_components=8),
    PCA(n_components=9),
    PCA(n_components=10),
    PCA(n_components=11),
    PCA(n_components=12),
    PCA(n_components=13),
    PCA(n_components=14),
    PCA(n_components=15)
]

train = learning.get_base_data(2015)
test = learning.get_base_data(2014)

print('Starting...')
modeldict, fmodel, ftrans, score, avgtrues, avgpreds = trystuff("FPoints", train, test, models, transforms)
joblib.dump(modeldict, "FPoints" + "_" + "modeldict.p")
print(fmodel)
print(ftrans)
print("Average fantasy points with our model: " + str(avgpreds))
print("Optimal average fantasy points: " + str(avgtrues))
print("MSE: " + str(score))

Starting...
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
PCA(copy=True, iterated_power='auto', n_components=12, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
Average fantasy points with our model: 38.4195402299
Optimal average fantasy points: 72.4137931034
MSE: 33.9942528736
