# Regression

In [3]:
import learning
%load_ext autoreload
%autoreload 2
from IPython.core import display as ICD
import pandas as pd
from mord import OrdinalRidge
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
pd.options.display.max_columns = 999
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_regression
from tqdm import tqdm
from sklearn.externals import joblib
from sklearn.svm import SVR
import copy

def trystuff(output, train, test, models, transforms):
    maxscore = 0
    modeldict = {}
    _, Xtrain, ytrain = learning.prepare_base_data(train, output=output)
    transforms = copy.deepcopy(transforms)
    for transform in transforms:
        transform.fit(Xtrain, ytrain)
        Xt = transform.transform(Xtrain)
        models = copy.deepcopy(models)
        for model in models:
            model.fit(Xt, ytrain)
            if(output=="FPoints"):
                trues, preds, score = learning.score_overall(test, model, transform)
            else:
                trues, preds, score = learning.score(test, {output: model}, {output: transform})
            score = np.mean(preds) / np.mean(trues)
            print(transform)
            print(model)
            print(score)
            modeldict[(transform, model)] = score
            if(score > maxscore):
                avgtrues = np.mean(trues)
                avgpreds = np.mean(preds)
                besttrans = transform
                bestmodel = model
                maxscore = score
    return modeldict, bestmodel, besttrans, maxscore, avgtrues, avgpreds

## Seperate models

In [4]:
from sklearn.svm import SVR
from sklearn.decomposition import PCA

models = [
    LinearRegression(),
    MLPRegressor(),
    Ridge(),
    Lasso(),
    SVR()
]

transforms = [
    PCA(n_components=7),
    PCA(n_components=8),
    PCA(n_components=9),
    PCA(n_components=10),
    PCA(n_components=11),
    PCA(n_components=12),
    PCA(n_components=13),
]

train = learning.get_base_data(2015)
test = learning.get_base_data(2014)

outputs = ["Goals", "Assists", "Shots", "Blocks"]
fmodels = {}
ftrans = {}
fscores = {}

print('Starting...')
print()
for output in outputs:
    print('################# ' + str(output) + ' #################')
    print()
    modeldict, m, trans, score, avgtrues, avgpreds = trystuff(output, train, test, models, transforms)
    for key, value in modeldict.items():
        print(key[0])
        print(key[1])
        print(value)
        print()
    print('Best...')
    print(m)
    print(trans)
    print(score)
    print()
    print()
    joblib.dump(modeldict, output + "_" + "modeldict.p")
    fmodels[output] = m
    ftrans[output] = trans
    fscores[output] = score
    
true, pred, score = learning.score(test, fmodels, ftrans)
print()
print('Overall score...')
print(np.mean(pred) / np.mean(true))

Starting...

################# Goals #################

PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
0.272466539197
PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
0.280114722753
PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max

## One model

In [4]:
import learning
from sklearn.decomposition import PCA

models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    MLPRegressor(),
    SVR(),
]

transforms = [
    PCA(n_components=6),
    PCA(n_components=7),
    PCA(n_components=8),
    PCA(n_components=9),
    PCA(n_components=10),
    PCA(n_components=11),
    PCA(n_components=12),
    PCA(n_components=13)
]

train = learning.get_base_data(2015)
test = learning.get_base_data(2014)

print('Starting...')
modeldict, fmodel, ftrans, score, avgtrues, avgpreds = trystuff("FPoints", train, test, models, transforms)
joblib.dump(modeldict, "FPoints" + "_" + "modeldict.p")

for key, value in modeldict.items():
    print(key[0])
    print(key[1])
    print(value)
    print()
    
print('Best...')
print(fmodel)
print(ftrans)    
print("Average fantasy points with our model: " + str(avgpreds))
print("Optimal average fantasy points: " + str(avgtrues))
print("MSE: " + str(score))

Starting...
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
0.519126984127
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)
0.519126984127
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
0.510396825397
PCA(copy=True, iterated_power='auto', n_components=6, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
