In [58]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import sys
from sklearn import linear_model
from sklearn import svm
from sklearn import cross_validation
from sklearn import grid_search
from sklearn import metrics
from sklearn import preprocessing
from sklearn.learning_curve import learning_curve
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 250

In [59]:
sys.path.append('../')
import read_player_stats
import training_data

In [3]:
#grab total data for all positions
qb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='qb')
rb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='rb')
wr_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='wr')
te_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='te')

In [4]:
#training sets
qb_train_df = training_data.make_training_df(qb_total_df, seasons=range(2004,2014), ppg=True)
rb_train_df = training_data.make_training_df(rb_total_df, seasons=range(2004,2014), ppg=True)
wr_train_df = training_data.make_training_df(wr_total_df, seasons=range(2004,2014), ppg=True)
te_train_df = training_data.make_training_df(te_total_df, seasons=range(2004,2014), ppg=True)

In [5]:
qb_total_df.head()

Unnamed: 0,Name,Team,Games,PassComp,PassAtt,PassYards,PassTD,INT,RunAtt,RunYards,RunTD,FFP,FFPPG,Season
28,AJFeeley,MIA,11,191,356,1893,11,15,14,13,1,146.0,13.3,2004
164,AJFeeley,MIA,2,0,0,0,0,0,0,0,0,0.0,0.0,2005
224,AJFeeley,PHI,2,26,38,342,3,0,1,3,0,29.4,14.7,2006
304,AJFeeley,PHI,4,59,103,681,5,8,7,23,0,56.4,14.1,2007
637,AJFeeley,STL,5,53,97,548,1,2,3,4,0,31.8,6.4,2011


In [6]:
#train one regression model
X_train = np.array(rb_train_df.drop(['Name','FFPPG'], axis=1))
y_train = np.array(rb_train_df['FFPPG'])

parameters = {'alpha': np.logspace(-5,5,num=30)}
lin_model = grid_search.GridSearchCV(linear_model.Ridge(normalize=True), parameters, cv=5, scoring='mean_absolute_error')
lin_model.fit(X_train, y_train)

scores =  cross_validation.cross_val_score(lin_model.best_estimator_, X_train, y_train, cv=5, scoring='mean_absolute_error')
print(scores)
print(np.mean(scores))
print(np.std(scores))

[-2.92093288 -3.22717855 -2.91165253 -2.64943109 -2.80679486]
-2.90319798116
0.189230341234


In [7]:
#train SVM
X_train = np.array(rb_train_df.drop(['Name','FFPPG'], axis=1))
y_train = np.array(rb_train_df['FFPPG'])

X_scaled = preprocessing.scale(X_train)

best_score = 9999
best_params = {}

C_list = np.logspace(-5,2,num=11)
gamma_list = np.logspace(-3,1,num=11)
epsilon_list = [.1]
for c_test in C_list:
    for gamma_test in gamma_list:
        for epsilon_test in epsilon_list:
            svm_model = svm.SVR(kernel='rbf', C=c_test, gamma=gamma_test, epsilon=epsilon_test)
            scores = cross_validation.cross_val_score(svm_model, X_scaled, y_train, cv=5, scoring='mean_absolute_error')
            mean_score = np.mean(scores)
            print('params: ' + str(svm_model.get_params()))
            print(mean_score)
            if abs(mean_score) < abs(best_score):
                best_score = mean_score
                best_params = svm_model.get_params()
        
print('***Best Params***')
print(svm_model.get_params())
print('Score:' + str(best_score))

params: {'kernel': 'rbf', 'C': 1.0000000000000001e-05, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.001}
-4.8770307307
params: {'kernel': 'rbf', 'C': 1.0000000000000001e-05, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.0025118864315095794}
-4.87695668473
params: {'kernel': 'rbf', 'C': 1.0000000000000001e-05, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.0063095734448019303}
-4.8768193194
params: {'kernel': 'rbf', 'C': 1.0000000000000001e-05, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.015848931924611141}
-4.87664976791
params: {'kernel': 'rbf', 'C': 1.0000000000000001e-05, 'verbose': False, 'degree': 3, 'epsilon'

In [8]:
lin_model.best_estimator_

Ridge(alpha=0.012689610031679234, copy_X=True, fit_intercept=True,
   max_iter=None, normalize=True, solver='auto', tol=0.001)

In [9]:
qb_model = training_data.train_player_model(qb_train_df)
rb_model = training_data.train_player_model(rb_train_df)
wr_model = training_data.train_player_model(wr_train_df)
te_model = training_data.train_player_model(te_train_df)

In [10]:
qb_svm_model = training_data.train_svm_model(qb_train_df)
rb_svm_model = training_data.train_svm_model(rb_train_df)
wr_svm_model = training_data.train_svm_model(wr_train_df)
te_svm_model = training_data.train_svm_model(te_train_df)

***Best Params***
{'kernel': 'rbf', 'C': 3.9810717055349691, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.0063095734448019303}
Score:-4.59134650629
***Best Params***
{'kernel': 'rbf', 'C': 100.0, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.0025118864315095794}
Score:-2.81126181503
***Best Params***
{'kernel': 'rbf', 'C': 19.952623149688787, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.0025118864315095794}
Score:-2.98294546523
***Best Params***
{'kernel': 'rbf', 'C': 100.0, 'verbose': False, 'degree': 3, 'epsilon': 0.1, 'shrinking': True, 'max_iter': -1, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.001}
Score:-1.93935933524


In [42]:
#projections for 2015
qb_most_rec = training_data.data_for_projection(qb_total_df, season=2015)
rb_most_rec = training_data.data_for_projection(rb_total_df, season=2015)
wr_most_rec = training_data.data_for_projection(wr_total_df, season=2015)
te_most_rec = training_data.data_for_projection(te_total_df, season=2015)

In [43]:
qb_proj = training_data.ff_projection(qb_most_rec, qb_model)
rb_proj = training_data.ff_projection(rb_most_rec, rb_model)
wr_proj = training_data.ff_projection(wr_most_rec, wr_model)
te_proj = training_data.ff_projection(te_most_rec, te_model)

In [44]:
qb_svm_proj = training_data.ff_projection(qb_most_rec, qb_svm_model, normalize=True)
rb_svm_proj = training_data.ff_projection(rb_most_rec, rb_svm_model, normalize=True)
wr_svm_proj = training_data.ff_projection(wr_most_rec, wr_svm_model, normalize=True)
te_svm_proj = training_data.ff_projection(te_most_rec, te_svm_model, normalize=True)

In [55]:
qb_data = {'Name':qb_most_rec['Name'], 'Linear Projection':qb_proj, 'SVM Projection':qb_svm_proj}
qb_proj_table = pd.DataFrame(qb_data)
qb_proj_table = qb_proj_table[['Name', 'Linear Projection', 'SVM Projection']]

rb_data = {'Name':rb_most_rec['Name'], 'Linear Projection':rb_proj, 'SVM Projection':rb_svm_proj}
rb_proj_table = pd.DataFrame(rb_data)
rb_proj_table = rb_proj_table[['Name', 'Linear Projection', 'SVM Projection']]

wr_data = {'Name':wr_most_rec['Name'], 'Linear Projection':wr_proj, 'SVM Projection':wr_svm_proj}
wr_proj_table = pd.DataFrame(wr_data)
wr_proj_table = wr_proj_table[['Name', 'Linear Projection', 'SVM Projection']]

te_data = {'Name':te_most_rec['Name'], 'Linear Projection':te_proj, 'SVM Projection':te_svm_proj}
te_proj_table = pd.DataFrame(te_data)
te_proj_table = te_proj_table[['Name', 'Linear Projection', 'SVM Projection']]

#reindex
qb_proj_table.index = range(1, len(qb_proj_table)+1)
rb_proj_table.index = range(1, len(rb_proj_table)+1)
wr_proj_table.index = range(1, len(wr_proj_table)+1)
te_proj_table.index = range(1, len(te_proj_table)+1)

In [63]:
training_data.sort_and_reindex(wr_proj_table, col='SVM Projection')

Unnamed: 0,Name,Linear Projection,SVM Projection
1,AntonioBrown,19.570178,20.000636
2,DemaryiusThomas,18.163368,18.387515
3,RandallCobb,15.729187,17.963483
4,JulioJones,15.438448,17.561478
5,DezBryant,17.307398,17.4715
6,JordyNelson,17.071224,17.431823
7,AlshonJeffery,15.762167,17.187529
8,EmmanuelSanders,15.199328,16.28763
9,CalvinJohnson,14.987153,15.408966
10,AJGreen,14.445573,14.793949


In [60]:
#estimate of the number of starters at each position in the league
#1 QB 2.5 RB 3.5 WR 1 TE per team
starters = [12, 30, 42, 12] #QB RB WR TE
projections = [qb_proj_table, rb_proj_table, wr_proj_table, te_proj_table]
lin_bases = [training_data.sort_and_reindex(proj, col='Linear Projection').loc[nstarter, 'Linear Projection'] for nstarter, proj in zip(starters, projections)]
svm_bases = [training_data.sort_and_reindex(proj, col='SVM Projection').loc[nstarter, 'SVM Projection'] for nstarter, proj in zip(starters, projections)]

In [61]:
print(lin_bases)
print(svm_bases)

[19.800469900237673, 8.7446096270638876, 9.1755121180627928, 8.5863234367171337]
[19.974617225891155, 8.4897945465190965, 9.1045348931279477, 8.5249088281998162]


In [66]:
lin_base_sub_proj = [(proj['Linear Projection'] - base) for proj, base in zip(projections, lin_bases)]
svm_base_sub_proj = [(proj['SVM Projection'] - base) for proj, base in zip(projections, svm_bases)]
for proj, base in zip(projections, lin_base_sub_proj):
    proj['Lin Value Above Baseline'] = base
for proj, base in zip(projections, svm_base_sub_proj):
    proj['SVM Value Above Baseline'] = base
    
projections = [training_data.sort_and_reindex(proj, col='Lin Value Above Baseline') for proj in projections]

In [67]:
projections[0]

Unnamed: 0,Name,Linear Projection,SVM Projection,Lin Value Above Baseline,SVM Value Above Baseline
1,PeytonManning,25.857974,23.805697,6.057504,3.831079
2,DrewBrees,24.217709,23.118523,4.417239,3.143906
3,AndrewLuck,23.341387,23.352789,3.540917,3.378172
4,BenRoethlisberger,21.330288,21.15808,1.529819,1.183462
5,AaronRodgers,21.320408,21.588898,1.519938,1.61428
6,PhilipRivers,21.269777,21.04609,1.469307,1.071473
7,MattRyan,20.611727,20.726819,0.811257,0.752201
8,RussellWilson,20.266972,21.730924,0.466502,1.756306
9,TonyRomo,20.255,19.974617,0.45453,0.0
10,TomBrady,20.210638,20.247185,0.410168,0.272568


In [68]:
projections[1]

Unnamed: 0,Name,Linear Projection,SVM Projection,Lin Value Above Baseline,SVM Value Above Baseline
1,LeVeonBell,19.013159,19.6378,10.26855,11.148005
2,DeMarcoMurray,18.713261,20.477617,9.968651,11.987823
3,MattForte,17.427415,17.78421,8.682806,9.294415
4,MarshawnLynch,15.431197,13.959289,6.686588,5.469494
5,JamaalCharles,15.013924,11.64012,6.269315,3.150325
6,ArianFoster,14.540925,16.788556,5.796316,8.298761
7,LeSeanMcCoy,14.436467,12.032954,5.691858,3.543159
8,EddieLacy,14.340386,13.128368,5.595777,4.638573
9,JoiqueBell,12.373312,12.229851,3.628703,3.740056
10,GiovaniBernard,11.911546,11.717206,3.166937,3.227411


In [69]:
projections[2]

Unnamed: 0,Name,Linear Projection,SVM Projection,Lin Value Above Baseline,SVM Value Above Baseline
1,AntonioBrown,19.570178,20.000636,10.394666,10.896101
2,DemaryiusThomas,18.163368,18.387515,8.987856,9.28298
3,DezBryant,17.307398,17.4715,8.131886,8.366965
4,JordyNelson,17.071224,17.431823,7.895712,8.327288
5,AlshonJeffery,15.762167,17.187529,6.586655,8.082994
6,RandallCobb,15.729187,17.963483,6.553675,8.858948
7,JulioJones,15.438448,17.561478,6.262936,8.456943
8,EmmanuelSanders,15.199328,16.28763,6.023816,7.183095
9,CalvinJohnson,14.987153,15.408966,5.811641,6.304431
10,AJGreen,14.445573,14.793949,5.270061,5.689414


In [70]:
projections[3]

Unnamed: 0,Name,Linear Projection,SVM Projection,Lin Value Above Baseline,SVM Value Above Baseline
1,JimmyGraham,15.046483,16.392268,6.46016,7.867359
2,RobGronkowski,14.061922,15.50431,5.475598,6.979401
3,GregOlsen,12.51416,12.957457,3.927837,4.432548
4,MartellusBennett,11.457613,12.143713,2.871289,3.618804
5,JuliusThomas,11.352958,11.645589,2.766635,3.120681
6,AntonioGates,11.294143,11.433262,2.707819,2.908353
7,DelanieWalker,11.087186,11.096344,2.500863,2.571435
8,JasonWitten,10.488086,10.549348,1.901763,2.024439
9,CobyFleener,9.702871,9.49469,1.116548,0.969781
10,CharlesClay,9.281677,9.188304,0.695353,0.663396


In [72]:
projections[0].to_csv('../projections/2015_qb_proj.csv')
projections[1].to_csv('../projections/2015_rb_proj.csv')
projections[2].to_csv('../projections/2015_wr_proj.csv')
projections[3].to_csv('../projections/2015_te_proj.csv')