In [1]:
import pandas as pd
import pickle
import os
import re
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## load in data

In [2]:
path = './data/weighted/'
files = os.listdir(path)
player_stats_files = sorted([(int(re.sub('[^0-9]', '', f)), path + f)
                             for f in files if "player" in f], key=lambda x: x[0])
opp_stats_files = sorted([(int(re.sub('[^0-9]', '', f)), path + f)
                          for f in files if "opp" in f], key=lambda x: x[0])

historical_dfs = []
player_stats_2017 = None
for year, fn in player_stats_files:
    if year == 2017:
        player_stats_2017 = pd.read_csv(fn, index_col=0)
    else:
        historical_dfs.append(pd.read_csv(fn, index_col=0))
player_stats_hist = pd.concat(historical_dfs)

historical_dfs = []
opp_stats_2017 = None
for year, fn in opp_stats_files:
    if year == 2017:
        opp_stats_2017 = pd.read_csv(fn, index_col=0)
    else:
        historical_dfs.append(pd.read_csv(fn, index_col=0))
opp_stats_hist = pd.concat(historical_dfs)

new_opp_cols = ['offense', 'defense', 'opp_first_downs', 'opp_points',
       'opp_passing_yds', 'opp_penalty_cnt', 'opp_penalty_yds', 'opp_pos_time',
       'opp_punt_avg', 'opp_punt_cnt', 'opp_punt_yds', 'opp_rushing_yds',
       'opp_total_yds', 'opp_turnovers', 'week']
opp_stats_hist.columns = new_opp_cols
opp_stats_2017.columns = new_opp_cols

## clean data and create target variable

In [3]:
def clean(player_stats):
    player_stats['fantasy_points'] = (player_stats['passing_tds'] * 4) +\
    (player_stats['passing_yds'] * 0.04) +\
    (player_stats['passing_twoptm'] * 2) +\
    (player_stats['passing_ints'] * -2) +\
    (player_stats['rushing_tds'] * 6) +\
    (player_stats['rushing_yds'] * 0.1) +\
    (player_stats['rushing_twoptm'] * 2) +\
    (player_stats['receiving_tds'] * 6) +\
    (player_stats['receiving_yds'] * 0.1) +\
    (player_stats['receiving_twoptm'] * 2) +\
    (player_stats['kickret_tds'] * 6) +\
    (player_stats['puntret_tds'] * 6)
    
    include_positions = ['QB', 'TE', 'WR', 'RB']
    player_stats['position'] = player_stats['position'].str.replace('FB','RB')
    player_stats['team'] = player_stats['team'].str.replace('SD','LAC')
    player_stats['team'] = player_stats['team'].str.replace('JAC','JAX')
    player_stats = player_stats[player_stats['position'].isin(include_positions)]
    return player_stats

player_stats_2017 = clean(player_stats_2017)
player_stats_hist = clean(player_stats_hist)

## feature engineering

In [4]:
kick_stats = ['kicking_fga', 'kicking_fgm',
       'kicking_fgyds', 'kicking_totpfg', 'kicking_xpa', 'kicking_xpb',
       'kicking_xpmade', 'kicking_xpmissed', 'kicking_xptot', 'kickret_avg',
       'kickret_lng', 'kickret_lngtd', 'kickret_ret', 'kickret_tds',
       'punting_avg', 'punting_i20', 'punting_lng', 'punting_pts',
       'punting_yds', 'puntret_avg', 'puntret_lng', 'puntret_lngtd',
       'puntret_ret', 'puntret_tds',]

stat_cols = ['fumbles_lost', 'fumbles_rcv', 'fumbles_tot','fumbles_trcv', 'fumbles_yds', 
       'passing_att', 'passing_cmp', 'passing_ints', 'passing_tds',
       'passing_twopta', 'passing_twoptm', 'passing_yds','receiving_lng', 'receiving_lngtd',
       'receiving_rec', 'receiving_tds', 'receiving_twopta',
       'receiving_twoptm', 'receiving_yds', 'rushing_att', 'rushing_lng',
       'rushing_lngtd', 'rushing_tds', 'rushing_twopta', 'rushing_twoptm',
       'rushing_yds','fantasy_points']

### Historical player stats

In [5]:
# compute historical player stats
player_stats_hist = player_stats_hist
hist_stats = player_stats_hist.groupby('id')[stat_cols].agg(['mean','count']).reset_index()
hist_stats.columns = hist_stats.columns.map('_hist'.join)
hist_stats = hist_stats.rename(columns={'id_hist':'id'})

### Intra-season player stats

In [6]:
# compute intra-season player stats
player_stats_2017.sort_values(['id','week'], inplace=True)
player_stats = player_stats_2017[stat_cols+['id','week','team','position','full_name']]

In [7]:
def get_trend(df):
    # compute 3-week and 2-week points trends
    deltas = df.groupby(['id']).pct_change()
    deltas = deltas.add_prefix('chg_')
    deltas = pd.concat([df, deltas], axis=1)
    deltas2 = deltas.groupby(['id'])[deltas.columns].shift(1).fillna(0)
    deltas3 = deltas.groupby(['id'])[deltas.columns].shift(2).fillna(0)
    deltas2 = deltas2.add_prefix('per2_')
    deltas3 = deltas3.add_prefix('per3_')
    trend_df = pd.concat([deltas, deltas2, deltas3], axis=1)
    # average prior three deltas to get trend
    for col in stat_cols:
        name = 'trend_'+col
        trend_df[name] = trend_df[['chg_'+col,'per2_chg_'+col,'per3_chg_'+col]].mean(axis=1).fillna(0)
    return trend_df

def get_cumul_mean_stats(df, weeks):
    weeks_stats_mean = []
    for week in weeks:
        tmp = df[df.week <= week]
        tmp = tmp.groupby(['id'])[stat_cols].mean().reset_index()
        tmp = tmp.add_suffix('_mean')
        tmp['week'] = week
        weeks_stats_mean.append(tmp)
    cumavg_stats = pd.concat(weeks_stats_mean)
    cumavg_stats = cumavg_stats.rename(columns={'id_mean':'id'})
    return cumavg_stats

def get_cumul_stats_time_weighted(df, weeks):
    weeks_stats_mean_wgt = []
    for week in weeks:
        tmp1 = df[df.week <= week]
        mult = lambda x: np.asarray(x) * np.asarray(tmp1.week)
        tmp = tmp1[['id']+stat_cols].set_index('id').apply(mult).reset_index()
        tmp = tmp.groupby(['id'])[stat_cols].mean().reset_index()
        tmp = tmp.add_suffix('_wgtmean')
        tmp['week'] = week
        weeks_stats_mean_wgt.append(tmp)
    cumavg_stats_wgt = pd.concat(weeks_stats_mean_wgt)
    cumavg_stats_wgt = cumavg_stats_wgt.rename(columns={'id_wgtmean':'id'})
    return cumavg_stats_wgt

def defensive_ptsallow(matchups, weeks):
    # compute weekly cumulative mean points allowed by each defense
    defense_ranks_dfs = []
    for week in weeks:
        matchweek = matchups[matchups.week <= week]
        # weekly sum of pts allowed by a given defense to each position
        weekly_sums = matchweek.groupby(['week','defense','position'])['fantasy_points'].sum().reset_index()
        # STD mean of weekly sums for each position
        defense_pts_allowed = weekly_sums.groupby(['defense','position'])['fantasy_points'].mean().reset_index()
        defense_pts_allowed = defense_pts_allowed.rename(columns={'fantasy_points':'defensive_matchup_allowed'})
        defense_pts_allowed['week'] = week
        defense_ranks_dfs.append(defense_pts_allowed)
    defense_ranks = pd.concat(defense_ranks_dfs)
    return defense_ranks

In [9]:
weeks = sorted(player_stats.week.unique().tolist())

# offensive player stats
trend_df         = get_trend(player_stats)
cumavg_stats     = get_cumul_mean_stats(player_stats, weeks)
cumavg_stats_wgt = get_cumul_stats_time_weighted(player_stats, weeks)

# defensive opponent stats
sched             = opp_stats_2017[['offense','defense','week']]
matchups          = player_stats.merge(sched,how='left',left_on=['week','team'],right_on=['week','offense'])
defense_ranks     = defensive_ptsallow(matchups, weeks)

In [40]:
defense_ranks

Unnamed: 0,defense,position,defensive_matchup_allowed,week
0,ARI,QB,27.080000,1
1,ARI,RB,14.900000,1
2,ARI,TE,0.900000,1
3,ARI,WR,43.200000,1
4,ATL,QB,12.520000,1
5,ATL,RB,30.600000,1
6,ATL,TE,7.000000,1
7,ATL,WR,6.700000,1
8,BAL,QB,-1.000000,1
9,BAL,RB,12.700000,1


In [59]:
def defensive_ptsallow_wgt(matchups, weeks):
    # Calculate season-to-date (STD) weekly fantasy points rankings by position 
    player_weights = []
    for week in weeks:
        mask = (matchups.week <= week)
        # each player's mean fantasy points STD
        std_mean = matchups[mask][['id','team','position','fantasy_points','defense']]
        std_mean = std_mean.groupby(['position','id'], as_index=False).mean()

        # each player's weight in a given week with respect to their position
        week_max_position = std_mean.groupby('position', as_index=False).max()
        week_max_position = week_max_position[['position','fantasy_points']]
        week_max_position.columns = ['position','fp_max']
        weekly_weights = std_mean.merge(week_max_position,how='left',on='position')
        weekly_weights['player_weight'] = weekly_weights['fantasy_points'] / weekly_weights['fp_max']
        weekly_weights['week'] = week
        player_weights.append(weekly_weights)
    player_weights = pd.concat(player_weights)
    defense_matchups = matchups[['id','week','defense']]
    defense_ranks_wgt = weekly_weights.merge(defense_matchups, how="inner", on=['id', 'week'])
    return player_weights

In [60]:
defense_ranks_wgt = defensive_ptsallow_wgt(matchups, weeks)

In [61]:
tmp = matchups[['id','week','defense']]
tmp[tmp.id=='00-0019596']

Unnamed: 0,id,week,defense
0,00-0019596,1,KC
1,00-0019596,2,NO
2,00-0019596,3,HOU
3,00-0019596,4,CAR
4,00-0019596,5,TB
5,00-0019596,6,NYJ
6,00-0019596,7,ATL
7,00-0019596,8,LAC
8,00-0019596,10,DEN
9,00-0019596,11,OAK


In [62]:
defense_ranks_wgt[defense_ranks_wgt.id =='00-0019596']

Unnamed: 0,position,id,fantasy_points,fp_max,player_weight,week
0,QB,00-0019596,10.68,31.02,0.344294,1
0,QB,00-0019596,20.73,25.54,0.811668,2
0,QB,00-0019596,26.393333,26.393333,1.0,3
0,QB,00-0019596,24.915,25.54,0.975529,4
0,QB,00-0019596,22.856,23.288,0.98145,5
0,QB,00-0019596,21.743333,23.013333,0.944815,6
0,QB,00-0019596,21.274286,23.04,0.923363,7
0,QB,00-0019596,20.805,24.408571,0.852365,8
0,QB,00-0019596,20.805,24.408571,0.852365,9
0,QB,00-0019596,21.008889,24.408571,0.860718,10


In [64]:
defense_ranks_wgt.merge(tmp, 'inner', ['id','week'])

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3380 entries, 0 to 3379
Data columns (total 7 columns):
position          3380 non-null object
id                3380 non-null object
fantasy_points    3380 non-null float64
fp_max            3380 non-null float64
player_weight     3380 non-null float64
week              3380 non-null int64
defense           3378 non-null object
dtypes: float64(3), int64(1), object(3)
memory usage: 171.6+ KB


In [None]:
matchups['target_defense'] = matchups.sort_values(['id','week']).groupby('id')['defense'].shift(-1)
matchups['target'] = matchups.sort_values(['id','week']).groupby('id')['fantasy_points'].shift(-1)
matchups['target_week'] = matchups.sort_values(['id','week']).groupby('id')['week'].shift(-1)

## merge features

In [None]:
# merge historical player stats
matchups = matchups.merge(hist_stats, how='left', on='id')

In [None]:
# merge current defense rankings to target matchups
def_off_merge = matchups.merge(defense_ranks, how='left', left_on=['target_defense','week','position'], 
                               right_on=['defense','week','position'])

In [None]:
# merge intraseason average points and weighted points
avgs = cumavg_stats.merge(cumavg_stats_wgt,how='inner',on=['id','week'])
def_off_merge = def_off_merge.merge(avgs, how='left', on=['id','week'])

In [None]:
# create extra player attributes and merge to make model-ready df
player_attributes = player_stats_2017[['id','birthdate','years_pro','height','weight','position','profile_url']]
player_attributes.drop_duplicates(['id'],inplace=True)
from datetime import datetime
player_attributes['birthdate'] = pd.to_datetime(player_attributes['birthdate'])
player_attributes['age'] = player_attributes['birthdate'].apply(lambda x: (datetime.today() - x).days/365)
position_dummies = pd.get_dummies(player_attributes['position'])
player_attributes = pd.concat([position_dummies, player_attributes], axis=1).drop(['position'],axis=1)

model_df = player_attributes.merge(def_off_merge, how='right',on='id')

## clean and merge baseline projections

In [None]:
# clean Rotowire's weekly projections from ESPN
proj = pd.read_json("./ffb_proj/ffb_proj/spiders/firstrun.json")

clean_team_pos = proj['team'].str.replace('[^a-zA-Z\s]', '').str.upper().str.split(expand=True)
proj.drop(['team','position'],axis=1,inplace=True)
clean_team_pos.columns= ['team', 'position']

proj = pd.concat([proj, clean_team_pos], axis=1)

proj['team'] = proj['team'].str.replace('LAR','LA')
proj['team'] = proj['team'].str.replace('WSH','WAS')

proj = proj[['name','position','team','week','proj_pts']]

proj.columns = ['full_name','position','team','target_week','proj_pts']
proj = proj[proj.proj_pts != '--']
proj['proj_pts'] = pd.to_numeric(proj['proj_pts'])
proj = proj[~proj.proj_pts.isnull()]
proj['full_name'] = proj.full_name.str.replace('Jr.','').str.strip()
proj['full_name'] = proj.full_name.str.replace('Sr.','').str.strip()
proj['full_name'] = proj.full_name.str.replace('III','').str.strip()
proj['full_name'] = proj.full_name.str.replace('II','').str.strip()
convert_names = {'Joshua Bellamy':'Josh Bellamy',
                 'TJ Jones':'T.J. Jones',
                 'Will Fuller V':'Will Fuller',
                 'Matthew Dayes':'Matt Dayes'}
proj['full_name'] = proj.full_name.replace(convert_names)

# merge Rotowire data to model df
merge_cols = ['full_name','position','team','target_week']
model_df = model_df.merge(proj, how='left', on=merge_cols)

model_df.dropna(subset=['defensive_matchup_allowed'], inplace=True)
model_df.fillna(0, inplace=True)

In [None]:
model_df.replace([-np.inf,np.inf], 0, inplace=True)

In [None]:
model_df = model_df[model_df.fantasy_points_mean != 0]

In [None]:
model_df.dropna(inplace=True)
model_df.shape

## merge player status

In [None]:
injuries = pd.read_csv('injuries_data/injuries_weekly.csv')
injuries['full_name'] = injuries['firstName']+' '+injuries['lastName']

drops = ['Unnamed: 0','firstName','lastName', 'esbId','injury','player','position']
injuries.drop(drops, axis=1, inplace=True)
injuries.dropna(inplace=True)

practice_status = injuries[['week','full_name','practiceStatus']]
game_status = injuries[['week','full_name','gameStatus']]
psd = pd.get_dummies(practice_status['practiceStatus'])
gsd = pd.get_dummies(game_status['gameStatus'])
practice_status = pd.concat([practice_status, psd], axis=1)
game_status = pd.concat([game_status, gsd], axis=1).drop('--',axis=1)

weeks = sorted(injuries.week.unique())

game_status_counts = []
for week in weeks:
    tmp = game_status[game_status.week <= week]
    tmp = tmp.groupby('full_name').sum().reset_index()
    tmp['target_week'] = week
    game_status_counts.append(tmp)
game_status_counts = pd.concat(game_status_counts).drop('week',axis=1)
    
practice_status_counts = []
for week in weeks:
    tmp = practice_status[practice_status.week <= week]
    tmp = tmp.groupby('full_name').sum().reset_index()
    tmp['target_week'] = week
    practice_status_counts.append(tmp)
practice_status_counts = pd.concat(practice_status_counts).drop('week',axis=1)

In [None]:
status_cols = list(set(game_status_counts.columns.tolist() + practice_status_counts.columns.tolist()))

In [None]:
model_df = model_df.merge(practice_status_counts,how='left',on=['full_name', 'target_week'])
model_df = model_df.merge(game_status_counts,how='left',on=['full_name', 'target_week'])

for col in status_cols:
    model_df[col].fillna(0, inplace=True)

## explore interactions

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
numeric_indices = model_df.corr().index.tolist()
numeric_df = model_df[numeric_indices]

In [None]:
inter_feats = ['height', 'weight', 'years_pro', 'age',
               'fantasy_points',
               'chg_fantasy_points',
               'per2_chg_fantasy_points',
               'per2_fantasy_points',
               'per3_chg_fantasy_points',
               'per3_fantasy_points',
               'trend_fantasy_points',
               'fantasy_points_histmean',
               'fantasy_points_histcount',
               'fantasy_points_mean',
               'fantasy_points_wgtmean',
               'defensive_matchup_allowed',
               'fumbles_lost_histmean',
               'fumbles_rcv_histmean',
               'fumbles_tot_histmean',
               'fumbles_trcv_histmean',
               'fumbles_yds_histmean',
               'fumbles_lost_mean',
               'fumbles_rcv_mean',
               'fumbles_tot_mean',
               'fumbles_trcv_mean',
               'fumbles_yds_mean',
               'fumbles_lost_wgtmean',
               'fumbles_rcv_wgtmean',
               'fumbles_tot_wgtmean',
               'fumbles_trcv_wgtmean',
               'fumbles_yds_wgtmean',
               'Limited Participation in Practice',
               'Questionable',
               'Out',
               'Did Not Participate In Practice',
               'Doubtful',
               'Full Participation in Practice',
               'target']

In [None]:
tmp = model_df[inter_feats]

poly = PolynomialFeatures(3, interaction_only=False, include_bias=True)
interactions_arr = poly.fit_transform(tmp)
interactions_names = poly.get_feature_names(tmp.columns)
interactions = pd.DataFrame(interactions_arr, columns=interactions_names)

In [None]:
target_col = interactions.target
nontarget_interactions = interactions[[c for c in interactions.columns if "target" not in c]]
explore = pd.concat([target_col, nontarget_interactions], axis=1)

In [None]:
corr_interactions = explore.corr()[['target']].sort_values('target',ascending=True).reset_index()

In [None]:
corr_interactions = pd.read_csv('corr_inters_deg3.csv')

In [None]:
no_mean = [x for x in corr_interactions['index'].values if 'fantasy_points_mean' not in x]

In [None]:
def_allow = [x for x in no_mean if 'defensive_matchup_allowed' in x]

In [None]:
corr_interactions[corr_interactions['index'].isin(def_allow)].loc[57]['index'] #.head(25) #

In [None]:
model_df['fumble_yds X fp_mean X fp_histmean'] = model_df[['fantasy_points_histmean',
                                                           'fantasy_points_mean', 
                                                           'fumbles_yds_histmean']].prod(axis=1)

In [None]:
model_df['def_match X fumble_tot X fp_histmean'] = model_df[['fantasy_points_histmean',
                                                           'defensive_matchup_allowed', 
                                                           'fumbles_tot_histmean']].prod(axis=1)

## model

In [None]:
import sklearn.metrics as metrics
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from pprint import pprint
import math

In [None]:
def weekly_regression_all(input_df, pos=None):
    if pos != None:
        input_df = input_df[input_df.position == pos]
        
    features = input_df.select_dtypes(include=['float32','int32','int64','float64','uint8']).columns.tolist()
    features.remove('target')
    features.remove('proj_pts')
    
#     features = ['fantasy_points_mean','fumble_yds X fp_mean X fp_histmean', 'def_match X fumble_tot X fp_histmean']
    features = ['height','weight','years_pro','age','target_week',
                 'fantasy_points',
                 'chg_fantasy_points',
                 'trend_fantasy_points',
                 'fantasy_points_histmean',
                 'fantasy_points_histcount',
                 'fantasy_points_mean',
                 'fantasy_points_wgtmean',
                 'defensive_matchup_allowed',
                 'fumble_yds X fp_mean X fp_histmean',
                 'def_match X fumble_tot X fp_histmean']

#  'fumbles_lost_histmean',
#  'fumbles_rcv_histmean',
#  'fumbles_tot_histmean',
#  'fumbles_trcv_histmean',
#  'fumbles_yds_histmean',
#  'fumbles_lost_mean',
#  'fumbles_rcv_mean',
#  'fumbles_tot_mean',
#  'fumbles_trcv_mean',
#  'fumbles_yds_mean',
#  'fumbles_lost_wgtmean',
#  'fumbles_rcv_wgtmean',
#  'fumbles_tot_wgtmean',
#  'fumbles_trcv_wgtmean',
#  'fumbles_yds_wgtmean',
#  'Limited Participation in Practice',
#  'Questionable',
#  'Out',
#  'Did Not Participate In Practice',
#  'Doubtful',
#  'Full Participation in Practice']
    
    train_mse = []
    test_mse = []
    week_nums = []
    y_preds = []
    roto_scores = []
    ests = []
    coefs = []
        
    weeks = sorted(input_df.target_week.unique().tolist())
    weeks.remove(2.0)
    for week in weeks:
        
#         est = GradientBoostingRegressor(n_estimators=15)
#         est = RandomForestRegressor()
        est = LassoCV()
        
        df_train = input_df[input_df.target_week < week]
        df_test = input_df[input_df.target_week == week]
        
        X_train = df_train[features]
        y_train = df_train['target']
        X_test = df_test[features]
        y_test = df_test['target']
        rotowire_y_test = df_test.proj_pts
        
#         ss = StandardScaler()
#         X_train = ss.fit_transform(X_train)
#         X_test = ss.fit_transform(X_test)
        
        est.fit(X_train,y_train)
        
        y_pred = est.predict(X_train)
        score = (metrics.mean_squared_error(y_train, y_pred))**(0.5)
        train_mse.append(score)
        
        y_pred = est.predict(X_test)
        score = (metrics.mean_squared_error(y_test, y_pred))**(0.5)
        test_mse.append(score)
        y_preds.append(y_pred)
        
        roto_score = (metrics.mean_squared_error(y_test, rotowire_y_test))**(0.5)
        roto_scores.append(roto_score)
        week_nums.append(week)
        ests.append((week, est))
        
        if 'sklearn.linear_model' in est.__module__:
            coef_ranks = list(zip(abs(est.coef_), est.coef_, features))
            coef_ranks = sorted(coef_ranks, key=lambda x: x[0], reverse=True)
            coefs.append(coef_ranks)
        else:
            coef_ranks = list(zip(est.feature_importances_, features))
            coef_ranks = sorted(coef_ranks, key=lambda x: x[0], reverse=True)
            coefs.append(coef_ranks)
        
    return week_nums, test_mse, train_mse, y_preds, roto_scores, ests, coefs

In [None]:
weeks, mses_test, mses_train, y_preds, roto_scores, ests, coefs = weekly_regression_all(model_df, pos=None)
plt.figure(figsize=(25,12))
plt.plot(weeks, mses_test, label="Test Error")
plt.plot(weeks, mses_train, label="Train Error")
plt.plot(weeks, roto_scores, label="Rotowire Error", linestyle='--')
plt.xticks(weeks)
plt.ylabel("RMSE")
plt.legend()

In [None]:
for week, features, mse in zip(weeks, coefs, mses_test):
    print("WEEK:", week, "ERROR:", mse)
    pprint(features[:4])
    print('\n')

In [None]:
pred

In [None]:
coefs_all = {}
f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5,figsize=(12,25), sharex=True)

for pos, ax in [(None,ax1),('QB',ax2),('RB',ax3),('WR',ax4),('TE',ax5)]:
    weeks, mses_test, mses_train, y_preds, roto_scores, ests, coefs = weekly_regression(model_df, position=pos)
    coefs_all[pos] = coefs
    ax.plot(weeks, mses_test, label="Test Error")
    ax.plot(weeks, mses_train, label="Train Error")
    ax.plot(weeks, roto_scores, label="Rotowire Error", linestyle='--')
    ax.set_xticks(weeks)
    ax.set_ylabel("RMSE")
    ax.set_title(pos)
    ax.legend()

In [None]:
coefs_all = {}
f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5,figsize=(12,25), sharex=True)

for pos, ax in [(None,ax1),('QB',ax2),('RB',ax3),('WR',ax4),('TE',ax5)]:
    weeks, mses_test, mses_train, y_preds, roto_scores, ests, coefs = weekly_regression(model_df, position=pos)
    coefs_all[pos] = coefs
    ax.plot(weeks, mses_test, label="Test Error")
    ax.plot(weeks, mses_train, label="Train Error")
    ax.plot(weeks, roto_scores, label="Rotowire Error", linestyle='--')
    ax.set_xticks(weeks)
    ax.set_ylabel("RMSE")
    ax.set_title(pos)
    ax.legend()

In [None]:
pos = None
for week, c in zip(weeks, coefs_all[pos]):
    print("\nWEEK:",week)
    pprint(c[:4])

In [None]:
f = open('lasso_coefs.pkl', 'wb')
pickle.dump(coefs_all, f)

## interactions by position

In [None]:
# df for each position
df_QB = model_df[model_df.position=='QB']
df_RB = model_df[model_df.position=='RB']
df_WR = model_df[model_df.position=='WR']
df_TE = model_df[model_df.position=='TE']

In [None]:
def explore_interaction_corrs(pos, interactions):
    target_col = interactions.target
    nontarget_interactions = interactions[[c for c in interactions.columns if "target" not in c]]
    explore = pd.concat([target_col, nontarget_interactions], axis=1)
    corr_interactions = explore.corr()
    return corr_interactions[['target']].sort_values('target',ascending=False).reset_index()

In [None]:
# QB_inters = explore_interaction_corrs('QB', interactions_positions['QB'])
# WR_inters = explore_interaction_corrs('WR', interactions_positions['WR'])
# RB_inters = explore_interaction_corrs('RB', interactions_positions['RB'])
# TE_inters = explore_interaction_corrs('TE', interactions_positions['TE'])

QB_inters = pd.read_csv('interactions_QB.csv')
RB_inters = pd.read_csv('interactions_RB.csv')
WR_inters = pd.read_csv('interactions_WR.csv')
TE_inters = pd.read_csv('interactions_TE.csv')

In [None]:
# by position

# keep = ['target','age','height','weight','years_pro','proj_pts','defensive_matchup_allowed',
# 'fantasy_points','fantasy_points_mean','fantasy_points_histcount','fantasy_points_histmean',
# 'trend_fantasy_points','fantasy_points_wgtmean']

# passing_features = [c for c in numeric_df.columns if "passing" in c]
# rushing_features = [c for c in numeric_df.columns if "rushing" in c]
# receiving_features = [c for c in numeric_df.columns if "receiving" in c]

# QB_features = keep+passing_features+rushing_features
# RB_features = keep+rushing_features+receiving_features
# WR_features = keep+receiving_features
# TE_features = keep+receiving_features

# QB_df = model_df[model_df.position=='QB'][QB_features]
# RB_df = model_df[model_df.position=='RB'][RB_features]
# WR_df = model_df[model_df.position=='WR'][WR_features]
# TE_df = model_df[model_df.position=='TE'][TE_features]

# create_interactions = [('QB',QB_df), ('RB',RB_df), ('WR',WR_df), ('TE',TE_df)]

# interactions_positions = {}
# for pos, data in create_interactions:
#     poly = PolynomialFeatures(2, interaction_only=True, include_bias=True)
#     interactions_arr = poly.fit_transform(data)
#     interactions_names = poly.get_feature_names(data.columns)
#     interactions = pd.DataFrame(interactions_arr, columns=interactions_names)
#     interactions_positions[pos] = interactions

### QB

In [None]:
no_roto = [v for v in QB_inters['index'].values if 'proj_pts' not in v]

QB_inters #[QB_inters.target < 0]

no_roto_qb = QB_inters[QB_inters['index'].isin(no_roto)].iloc[1:10]
no_roto_qb

QBIfields = no_roto_qb['index'].iloc[1:10].values.tolist()
QB_inters_df = interactions_positions['QB'][QBIfields]

model_df_QB = pd.concat([df_QB[keep_model], QB_inters_df],axis=1)

df_QB[keep_model].shape

QB_inters_df.shape

model_df_QB.dropna(inplace=True)

### RB

In [None]:
no_roto = [v for v in RB_inters['index'].values if 'proj_pts' not in v]
RB_inters[RB_inters.target < 0].tail()

In [None]:
no_roto_RB = RB_inters[RB_inters['index'].isin(no_roto)].iloc[1:40]
no_roto_RB

In [None]:
RBIfields = no_roto_RB['index'].values.tolist()
RB_inters_df = interactions_positions['RB'][RBIfields]

In [None]:
model_df_RB = pd.concat([df_RB[keep_model], RB_inters_df],axis=1)
model_df_RB.shape

In [None]:
model_df_RB.dropna(inplace=True)

### WR

### TE