In [1]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import sys
from sklearn import linear_model
from sklearn import svm
from sklearn import cross_validation
from sklearn import grid_search
from sklearn import metrics
from sklearn import preprocessing
from sklearn.learning_curve import learning_curve
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 250

In [2]:
sys.path.append('../')
import read_player_stats
import training_data

In [3]:
#grab total data for all positions
qb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='qb')
rb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='rb')
wr_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='wr')
te_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='te')

In [4]:
#training sets
qb_train_df = training_data.make_training_df(qb_total_df, seasons=range(2004,2014), ppg=True)
rb_train_df = training_data.make_training_df(rb_total_df, seasons=range(2004,2014), ppg=True)
wr_train_df = training_data.make_training_df(wr_total_df, seasons=range(2004,2014), ppg=True)
te_train_df = training_data.make_training_df(te_total_df, seasons=range(2004,2014), ppg=True)

In [5]:
qb_total_df.head()

Unnamed: 0,Name,Team,Games,PassComp,PassAtt,PassYards,PassTD,INT,RunAtt,RunYards,RunTD,FFP,FFPPG,Season
28,AJFeeley,MIA,11,191,356,1893,11,15,14,13,1,146.0,13.3,2004
164,AJFeeley,MIA,2,0,0,0,0,0,0,0,0,0.0,0.0,2005
224,AJFeeley,PHI,2,26,38,342,3,0,1,3,0,29.4,14.7,2006
304,AJFeeley,PHI,4,59,103,681,5,8,7,23,0,56.4,14.1,2007
637,AJFeeley,STL,5,53,97,548,1,2,3,4,0,31.8,6.4,2011


In [6]:
#train one regression model
X_train = np.array(rb_train_df.drop(['Name','FFPPG'], axis=1))
y_train = np.array(rb_train_df['FFPPG'])

parameters = {'alpha': np.logspace(-5,5,num=30)}
lin_model = grid_search.GridSearchCV(linear_model.Ridge(normalize=True), parameters, cv=10)
lin_model.fit(X_train, y_train)

scores =  cross_validation.cross_val_score(lin_model.best_estimator_, X_train, y_train, cv=10, scoring='mean_absolute_error')
print(scores)
print(np.mean(scores))
print(np.std(scores))

[-2.68631089 -3.10297833 -2.92941368 -3.53803987 -2.90174592 -2.92676825
 -2.90999857 -2.41113394 -2.80128026 -2.79069755]
-2.89983672745
0.275013671964


In [22]:
#train SVM
X_train = np.array(rb_train_df.drop(['Name','FFPPG'], axis=1))
y_train = np.array(rb_train_df['FFPPG'])

X_scaled = preprocessing.scale(X_train)

parameters = {'C': np.logspace(-4,4,num=10), 'gamma': np.logspace(-4,4,num=10)}
#parameters = {'C': [100], 'gamma': [.001]}
svm_model = grid_search.GridSearchCV(svm.SVR(kernel='rbf'), parameters, cv=10)
svm_model.fit(X_train, y_train)

scores =  cross_validation.cross_val_score(svm_model.best_estimator_, X_scaled, y_train, cv=10, scoring='mean_absolute_error')
print(scores)
print(np.mean(scores))
print(np.std(scores))

[-2.58375229 -3.15643421 -2.83987028 -3.47990112 -2.87063625 -2.7747522
 -3.08661206 -2.33056681 -2.98754455 -2.92809365]
-2.9038163414
0.298262823286


In [23]:
lin_model.best_estimator_

Ridge(alpha=0.012689610031679234, copy_X=True, fit_intercept=True,
   max_iter=None, normalize=True, solver='auto', tol=0.001)

In [24]:
svm_model.best_estimator_

SVR(C=21.544346900318821, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.0001, kernel='rbf', max_iter=-1, shrinking=True, tol=0.001,
  verbose=False)

In [10]:
qb_model = training_data.train_player_model(qb_train_df)
rb_model = training_data.train_player_model(rb_train_df)
wr_model = training_data.train_player_model(wr_train_df)
te_model = training_data.train_player_model(te_train_df)

In [11]:
#projections for 2015
qb_most_rec = training_data.data_for_projection(qb_total_df, season=2015)
rb_most_rec = training_data.data_for_projection(rb_total_df, season=2015)
wr_most_rec = training_data.data_for_projection(wr_total_df, season=2015)
te_most_rec = training_data.data_for_projection(te_total_df, season=2015)

In [12]:
qb_proj = training_data.ff_projection(qb_most_rec, qb_model)
rb_proj = training_data.ff_projection(rb_most_rec, rb_model)
wr_proj = training_data.ff_projection(wr_most_rec, wr_model)
te_proj = training_data.ff_projection(te_most_rec, te_model)

In [13]:
#estimate of the number of starters at each position in the league
#1 QB 2.5 RB 3.5 WR 1 TE per team
starters = [12, 30, 42, 12] #QB RB WR TE
projections = [qb_proj, rb_proj, wr_proj, te_proj]
bases = [proj.loc[nstarter, '2015 Projection'] for nstarter, proj in zip(starters, projections)]

In [14]:
bases

[19.80046990023768, 8.7446096270640243, 9.1755121180628016, 8.586323436717187]

In [15]:
base_sub_proj = [(proj['2015 Projection'] - base) for proj, base in zip(projections, bases)]
for proj, base in zip(projections, base_sub_proj):
    proj['Value Above Baseline'] = base

In [16]:
projections[0]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,PeytonManning,25.857974,6.057504
2,DrewBrees,24.217709,4.417239
3,AndrewLuck,23.341387,3.540917
4,BenRoethlisberger,21.330288,1.529819
5,AaronRodgers,21.320408,1.519938
6,PhilipRivers,21.269777,1.469307
7,MattRyan,20.611727,0.811257
8,RussellWilson,20.266972,0.466502
9,TonyRomo,20.255,0.45453
10,TomBrady,20.210638,0.410168


In [17]:
projections[1]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,LeVeonBell,19.013159,10.26855
2,DeMarcoMurray,18.713261,9.968651
3,MattForte,17.427415,8.682806
4,MarshawnLynch,15.431197,6.686588
5,JamaalCharles,15.013924,6.269315
6,ArianFoster,14.540925,5.796316
7,LeSeanMcCoy,14.436467,5.691858
8,EddieLacy,14.340386,5.595777
9,JoiqueBell,12.373312,3.628703
10,GiovaniBernard,11.911546,3.166937


In [18]:
projections[2]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,AntonioBrown,19.570178,10.394666
2,DemaryiusThomas,18.163368,8.987856
3,DezBryant,17.307398,8.131886
4,JordyNelson,17.071224,7.895712
5,AlshonJeffery,15.762167,6.586655
6,RandallCobb,15.729187,6.553675
7,JulioJones,15.438448,6.262936
8,EmmanuelSanders,15.199328,6.023816
9,CalvinJohnson,14.987153,5.811641
10,AJGreen,14.445573,5.270061


In [19]:
projections[3]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,JimmyGraham,15.046483,6.46016
2,RobGronkowski,14.061922,5.475598
3,GregOlsen,12.51416,3.927837
4,MartellusBennett,11.457613,2.871289
5,JuliusThomas,11.352958,2.766635
6,AntonioGates,11.294143,2.707819
7,DelanieWalker,11.087186,2.500863
8,JasonWitten,10.488086,1.901763
9,CobyFleener,9.702871,1.116548
10,CharlesClay,9.281677,0.695353


In [20]:
qb_proj.to_csv('../projections/2015_qb_proj.csv')
rb_proj.to_csv('../projections/2015_rb_proj.csv')
wr_proj.to_csv('../projections/2015_wr_proj.csv')
te_proj.to_csv('../projections/2015_te_proj.csv')