In [19]:
%reload_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import read_player_stats
import training_data
from sklearn import linear_model
from sklearn import cross_validation
from sklearn import grid_search
from sklearn import metrics
from sklearn.learning_curve import learning_curve
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 250

In [20]:
#grab total data for all positions
qb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='qb')
rb_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='rb')
wr_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1,2,3], pos='wr')
te_total_df = training_data.make_total_data(seasons=range(2004,2015), pages=[0,1], pos='te')

In [21]:
#training sets
qb_train_df = training_data.make_training_df(qb_total_df, seasons=range(2004,2014), ppg=True)
rb_train_df = training_data.make_training_df(rb_total_df, seasons=range(2004,2014), ppg=True)
wr_train_df = training_data.make_training_df(wr_total_df, seasons=range(2004,2014), ppg=True)
te_train_df = training_data.make_training_df(te_total_df, seasons=range(2004,2014), ppg=True)

In [22]:
qb_total_df.head()

Unnamed: 0,Name,Team,Games,PassComp,PassAtt,PassYards,PassTD,INT,RunAtt,RunYards,RunTD,FFP,FFPPG,Season
28,AJFeeley,MIA,11,191,356,1893,11,15,14,13,1,146.0,13.3,2004
164,AJFeeley,MIA,2,0,0,0,0,0,0,0,0,0.0,0.0,2005
224,AJFeeley,PHI,2,26,38,342,3,0,1,3,0,29.4,14.7,2006
304,AJFeeley,PHI,4,59,103,681,5,8,7,23,0,56.4,14.1,2007
637,AJFeeley,STL,5,53,97,548,1,2,3,4,0,31.8,6.4,2011


In [23]:
#train one regression model
X_train = np.array(qb_train_df.drop(['Name','FFPPG'], axis=1))
y_train = np.array(qb_train_df['FFPPG'])

parameters = {'alpha': np.logspace(-5,5,num=30)}
lin_model = grid_search.GridSearchCV(linear_model.Ridge(normalize=True), parameters, cv=10)
lin_model.fit(X_train, y_train)

scores =  cross_validation.cross_val_score(lin_model.best_estimator_, X_train, y_train, cv=10, scoring='mean_absolute_error')
scores

array([-5.19642518, -4.96827708, -4.85902221, -5.32455082, -4.78903028,
       -3.57224926, -4.76286332, -4.39907412, -4.39593469, -4.60734357])

In [24]:
lin_model.best_estimator_

Ridge(alpha=0.30391953823132012, copy_X=True, fit_intercept=True,
   max_iter=None, normalize=True, solver='auto', tol=0.001)

In [25]:
qb_model = training_data.train_player_model(qb_train_df)
rb_model = training_data.train_player_model(rb_train_df)
wr_model = training_data.train_player_model(wr_train_df)
te_model = training_data.train_player_model(te_train_df)

In [26]:
#projections for 2015
qb_most_rec = training_data.data_for_projection(qb_total_df, season=2015)
rb_most_rec = training_data.data_for_projection(rb_total_df, season=2015)
wr_most_rec = training_data.data_for_projection(wr_total_df, season=2015)
te_most_rec = training_data.data_for_projection(te_total_df, season=2015)

In [27]:
qb_proj = training_data.ff_projection(qb_most_rec, qb_model)
rb_proj = training_data.ff_projection(rb_most_rec, rb_model)
wr_proj = training_data.ff_projection(wr_most_rec, wr_model)
te_proj = training_data.ff_projection(te_most_rec, te_model)

In [48]:
#estimate of the number of starters at each position in the league
#1 QB 2.5 RB 3.5 WR 1 TE per team
starters = [12, 30, 42, 12] #QB RB WR TE
projections = [qb_proj, rb_proj, wr_proj, te_proj]
bases = [proj.loc[nstarter, '2015 Projection'] for nstarter, proj in zip(starters, projections)]

In [49]:
bases

[19.800469900237673, 6.72314517850638, 5.7779185623959428, 5.3835609788721737]

In [55]:
base_sub_proj = [(proj['2015 Projection'] - base) for proj, base in zip(projections, bases)]
for proj, base in zip(projections, base_sub_proj):
    proj['Value Above Baseline'] = base

In [57]:
projections[0]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,PeytonManning,25.857974,6.057504
2,DrewBrees,24.217709,4.417239
3,AndrewLuck,23.341387,3.540917
4,BenRoethlisberger,21.330288,1.529819
5,AaronRodgers,21.320408,1.519938
6,PhilipRivers,21.269777,1.469307
7,MattRyan,20.611727,0.811257
8,RussellWilson,20.266972,0.466502
9,TonyRomo,20.255,0.45453
10,TomBrady,20.210638,0.410168


In [58]:
projections[1]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,DeMarcoMurray,15.516788,8.793643
2,LeVeonBell,14.998517,8.275372
3,MattForte,13.147425,6.42428
4,MarshawnLynch,13.002078,6.278933
5,LeSeanMcCoy,12.037441,5.314296
6,EddieLacy,11.952104,5.228959
7,ArianFoster,11.918932,5.195787
8,JamaalCharles,11.764224,5.041079
9,JoiqueBell,10.114279,3.391134
10,AlfredMorris,9.655172,2.932027


In [59]:
projections[2]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,AntonioBrown,12.678029,6.90011
2,DemaryiusThomas,12.068404,6.290486
3,DezBryant,11.689193,5.911274
4,JordyNelson,11.375867,5.597948
5,AlshonJeffery,10.395144,4.617225
6,RandallCobb,10.364489,4.586571
7,CalvinJohnson,10.086471,4.308552
8,JulioJones,9.910243,4.132324
9,EmmanuelSanders,9.782172,4.004254
10,AJGreen,9.527231,3.749312


In [60]:
projections[3]

Unnamed: 0,Name,2015 Projection,Value Above Baseline
1,JimmyGraham,9.551878,4.168317
2,RobGronkowski,9.088271,3.70471
3,GregOlsen,7.80392,2.420359
4,JuliusThomas,7.480508,2.096947
5,AntonioGates,7.066133,1.682572
6,DelanieWalker,7.05257,1.669009
7,MartellusBennett,6.94302,1.559459
8,JasonWitten,6.502709,1.119148
9,CobyFleener,6.282668,0.899107
10,CharlesClay,5.644396,0.260835


In [61]:
qb_proj.to_csv('projections/2015_qb_proj.csv')
rb_proj.to_csv('projections/2015_rb_proj.csv')
wr_proj.to_csv('projections/2015_wr_proj.csv')
te_proj.to_csv('projections/2015_te_proj.csv')