In [315]:
%matplotlib inline
import numpy as np
import sklearn as sk
import scipy as sp
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from sqlite_api import *
from sklearn import linear_model
from math import sqrt
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [44]:
fdb = 'fantasy.db'

player_lbls = ['Player', 'Position', 'Team', 'Season', 'Plays', 'Games', 'RushAttempts', 
              'RushYrds', 'RushTDs', 'PassAttempts', 'Complete', 'PassYrds', 'PassTDs', 'Fumbles', 'Interceptions']

In [94]:
plyr = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')  

In [5]:
# create 2016 mock season database
rdb = 'rankings.db'
conn = sqlite3.connect(rdb)
c = conn.cursor()

In [96]:
for pos in ['Quarterbacks', 'RunningBacks', 'WideReceivers', 'TightEnds', 'Kickers']:
    c.execute('''CREATE Table ''' + pos +
                '''(Player VARCHAR(30),
                    Team VARCHAR(3),
                    Position CHARACTER(2),
                    PRIMARY KEY (Player))''')

In [97]:
c.execute('''CREATE Table Defenses
                (Team VARCHAR(3),
                Position CHARACTER(2),
                PRIMARY KEY (Team))''')

<sqlite3.Cursor at 0x21886351f10>

In [102]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')[['Player', 'Position', 'Team']]
QBs = df[df.Position == 'QB']
for idx in range(len(QBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO Quarterbacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [103]:
RBs = df[df.Position == 'RB']
for idx in range(len(RBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO RunningBacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [104]:
WRs = df[df.Position == 'WR']
for idx in range(len(WRs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO WideReceivers VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [105]:
TEs = df[df.Position == 'TE']
for idx in range(len(TEs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO TightEnds VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [107]:
df = select_to_df(fdb, 'KickerSeason', ['Player', 'Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Kickers VALUES (?, ?, ?)', df.iloc[idx])

In [108]:
df = select_to_df(fdb, 'DefenseSeason', ['Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Defenses VALUES (?, ?)', df.iloc[idx])

In [30]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, 'WHERE Season != 2016')
df['Score'] = ply.apply(score_row, args=(qb, wr, rb, te), axis=1)

## Offensive Features

In [45]:
def add_feature(df, name, func):
    df[name] = df.apply(func, axis=1)
    return df

In [258]:
offense_lbls = ['Team', 
                 'Season', 
                 'Yards',
                 'PassYrds',
                 'RushYrds',
                 'Points']

In [47]:
def pass_rush_rat(row):
    return row['PassYrds'] / row['RushYrds']

def yards_per_game(row):
    return row['Yards'] / 16

def pass_yrds_per_game(row):
    return row['PassYrds'] / 16

def rush_yrds_per_game(row):
    return row['RushYrds'] / 16

def points_per_game(row):
    return row['Points'] / 16

In [48]:
off = select_to_df('fantasy.db', 'OffenseSeason', offense_lbls)

In [49]:
off = add_feature(off, 'OffPassRushRatio', pass_rush_rat)
off = add_feature(off, 'OffYardsPerGame', yards_per_game)
off = add_feature(off, 'OffPassYrdsPerGame', pass_yrds_per_game)
off = add_feature(off, 'OffRushYrdsPerGame', rush_yrds_per_game)
off = add_feature(off, 'OffPointsPerGame', points_per_game)
off = off.drop(['Yards', 'PassYrds', 'RushYrds', 'Points'], axis=1)
off.head()

Unnamed: 0,Team,Season,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
0,NE,2007,2.55868,411.25,295.6875,115.5625,36.8125
1,DAL,2007,2.351088,365.6875,256.5625,109.125,28.4375
2,IND,2007,2.364009,358.6875,252.0625,106.625,28.125
3,JAX,2007,1.391886,357.4375,208.0,149.4375,25.6875
4,SEA,2007,2.448425,348.9375,247.75,101.1875,24.5625


## Additional Player Features

In [50]:
def pass_tdatt_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassTDs'] / row['PassAttempts']

def rush_tdatt_rat(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushTDs'] / row['RushAttempts']

def pass_rush_att_rat(row):
    if row['RushAttempts'] == 0:
        return 1
    return row['PassAttempts'] / row['RushAttempts']

def complete_perc(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Complete'] / row['PassAttempts']

def avg_rush_yrds(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushYrds'] / row['RushAttempts']

def avg_pass_yrds(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassYrds'] / row['PassAttempts']

def avg_plays(row):
    return row['Plays'] / row['Games']

def score_per_play(row):
    return row['Score'] / row['Plays']

def score_per_game(row):
    return row['Score'] / row['Games']

def games_perc(row):
    return row['Games'] / 16

def intercept_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Interceptions'] / row['PassAttempts']

def pass_att_per_game(row):
    return row['PassAttempts'] / row['Games']

def rush_att_per_game(row):
    return row['RushAttempts'] / row['Games']

def fumb_per_game(row):
    return row['Fumbles'] / row['Games']

In [51]:
df = select_to_df('fantasy.db', 'PlayerSeason', player_lbls)

def add_features(df):
    df['PassTDAttRatio'] = df.apply(pass_tdatt_rat, axis=1)
    df['RushTDAttRatio'] = df.apply(rush_tdatt_rat, axis=1)
    df['AvgPassYrds'] = df.apply(avg_pass_yrds, axis=1)
    df['AvgRushYrds'] = df.apply(avg_rush_yrds, axis=1)
    df['PercentComplete'] = df.apply(complete_perc, axis=1)
    df['PlaysPerGame'] = df.apply(avg_plays, axis=1)
    df['GamesPlayedPercent'] = df.apply(games_perc, axis=1)
    df['InterceptionRatio'] = df.apply(intercept_rat, axis=1)
    df['PassAttPerGame'] = df.apply(pass_att_per_game, axis=1)
    df['RushAttPerGame'] = df.apply(rush_att_per_game, axis=1)
    df['FumblesPerGame'] = df.apply(fumb_per_game, axis=1)
    df['PassRushAttRatio'] = df.apply(pass_rush_att_rat, axis=1)
    
    return df

df = add_features(df)

In [396]:
players = pd.merge(df, off, on=['Team', 'Season'])
players.tail()

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,InterceptionRatio,PassAttPerGame,RushAttPerGame,FumblesPerGame,PassRushAttRatio,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
5224,Danny Vitale,RB,CLE,2016,5.0,9.0,0.0,0.0,0.0,5.0,...,0.0,0.555556,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5225,Gary Barnidge,TE,CLE,2016,82.0,16.0,0.0,0.0,0.0,82.0,...,0.0,5.125,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5226,Seth DeValve,TE,CLE,2016,12.0,12.0,0.0,0.0,0.0,12.0,...,0.0,1.0,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5227,Connor Hamlett,TE,CLE,2016,1.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.333333,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5228,Randall Telfer,TE,CLE,2016,7.0,14.0,0.0,0.0,0.0,7.0,...,0.0,0.5,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5


## Creating the Models

In [399]:
class Model:
    def __init__(self, feature_list):
        self.features = feature_list
    
    def set_model(self, model):
        self.model = model
        
    def predict(self, feature_dict):
        parameters = list()
        for idx in range(len(self.features)):
            parameters.append(feature_dict[self.features[idx]])
        
        return self.model.predict(parameters)
        
class PositionModels:
    def __init__(self, position):
        self.models = dict()
        self.position = position
    
    def add_model(self, predict, model):
        self.models[predict] = model
        
    def predict_player(self, feature_dict):
        predictions = dict()
        for cur_predict in self.models:
            cur_model = self.models[cur_predict]
            
            # print(str(cur_predict)+': '+str(cur_model.predict(feature_dict)))
            predictions[cur_predict] = cur_model.predict(feature_dict)
        return predictions
        
    def predict_df(self, df):
        def predict_var(row, to_predict):
            #print(len(self.predict_player(row.to_dict())[to_predict]))
            return self.predict_player(row.to_dict())[to_predict][0]
        
        # add interactions
        interacts = set()
        for to_predict in self.models:
            for cur_feat in self.models[to_predict].features:
                if '*' in cur_feat:
                    interacts.add(cur_feat)
        
        for cur_interact in interacts:
            feat1 = cur_interact.split('*')[0][2:]
            feat2 = cur_interact.split('*')[1][2:]
            df[cur_interact] = df.apply(lambda r, f1, f2: float(r[f1])*float(r[f2]), args=(feat1, feat2), axis=1)
        
        for to_predict in self.models:
            print('Predicting '+to_predict+'...')
            new_col = 'Pred' + to_predict
            df[new_col] = df.apply(predict_var, args=(to_predict,), axis=1)
        
        return df
    
    def create_model(self, df, to_predict, features, game_limit=0, test_cutoff=2014, add_limits=dict(), interacts=list()):
        new_model = create_model(df, self.position, to_predict, features, game_limit=game_limit, 
                                 test_cutoff=test_cutoff, add_limits=add_limits, interacts=interacts)
        self.add_model(to_predict, new_model)

In [593]:
def create_model(df, pos, predict, features, reg_type='linear', game_limit=0, test_cutoff=2016, 
                 add_limits=dict(), interacts=list(), cat_features=list(), is_defense=False):
    if not is_defense:
        pkey = ['Player', 'Team', 'Season']
    else:
        pkey = ['Team', 'Season']

    df = df[df.Position == pos]
    left_vars = pkey + [predict]
    right_vars = pkey + features
    
    model_vars = left_vars.copy()
    orig_vars = model_vars.copy()
    for idx in range(len(features)):
        features[idx] = 'PS' + features[idx]
    
    for cat_var in cat_features:
        dummy_df = pd.get_dummies(df[cat_var])
        for var_name in dummy_df.columns:
            features.append(str(var_name))
    model_vars += features
        
    # build train dataframe
    model_df = pd.DataFrame(columns=model_vars)
    for season in range(2008, test_cutoff):
        prev_season = str(season - 1)
        season = str(season)
        left_df = df[df.GamesPlayedPercent >= (game_limit/16)]
        for limit in add_limits:
            left_df = left_df[left_df[limit] >= add_limits[limit]]
        left_df = left_df[left_vars][df.Season == season]
        right_df = df[right_vars][df.Season == prev_season]
        right_df['Season'] = str(season)
        # add categorical dummy variables
        for cat_var in cat_features:
            dummy_df = pd.get_dummies(df[cat_var])
            right_df = right_df.join(dummy_df)
        
        left_df = pd.merge(left_df, right_df, on=pkey)
        left_df.columns = model_vars
        model_df = model_df.append(left_df)
    
    # build interactions
    for cur_inter in interacts:
        feat1 = str(features[cur_inter[0]])
        feat2 = str(features[cur_inter[1]])
        model_df[feat1+'*'+feat2] = model_df.apply(lambda row, feat1, feat2: row[feat1] * row[feat2], 
                                                 args=(feat1, feat2), axis=1)
        features.append(feat1+'*'+feat2)
            
    
    # build test dataframe
    #model_vars = orig_vars
    test_df = pd.DataFrame(columns=model_vars)
    for season in range(test_cutoff, 2017):
        left_df = df[df.GamesPlayedPercent >= (game_limit/16)]
        for limit in add_limits:
            left_df = left_df[left_df[limit] >= add_limits[limit]]
        left_df = left_df[left_vars][df.Season == str(season)]
        right_df = df[right_vars][df.Season == str(season-1)]
        right_df['Season'] = str(season)
        # add categorical dummy variables
        for cat_var in cat_features:
            dummy_df = pd.get_dummies(df[cat_var])
            right_df = right_df.join(dummy_df)

        left_df = pd.merge(left_df, right_df, on=pkey)
        left_df.columns = model_vars
        test_df = test_df.append(left_df)
        
    # build interactions
    for cur_inter in interacts:
        feat1 = str(features[cur_inter[0]])
        feat2 = str(features[cur_inter[1]])
        test_df[feat1+'*'+feat2] = test_df.apply(lambda row, feat1, feat2: row[feat1] * row[feat2], 
                                                 args=(feat1, feat2), axis=1)     
    
    if reg_type == 'linear':
        model = linear_model.LinearRegression()
    elif reg_type == 'logistic':
        model = linear_model.LogisticRegression()
    model.fit(model_df[features], model_df[predict])
    print('|=== Predicting '+str(predict)+' ===|\n')
    print('|====== Model Coefficients ======|')
    for idx in range(len(model.coef_)):
        print(str(features[idx])+': '+str(model.coef_[idx]))
    print('Intercept: '+str(model.intercept_))
    print('\n|========== Performance ==========|')
    print('R-Sqr on Seasons '+str(test_cutoff)+' to 2016: '  + str(model.score(model_df[features], model_df[predict])))
    print('Train Data Size: '+str(len(model_df)))
    print('Test Data Size:'+str(len(test_df)))
    model_obj = Model(features)
    model_obj.set_model(model)
    
    return model_obj

## Quarterbacks

In [401]:
ModelsQB = PositionModels('QB')

In [402]:
AvgPassYrds = create_model(players, 'QB', 'AvgPassYrds', 
                           ['AvgPassYrds', 'PercentComplete', 'OffPassYrdsPerGame'], 
                   game_limit=10, test_cutoff=2014, interacts=[])
ModelsQB.add_model('AvgPassYrds', AvgPassYrds)

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.281325343102
PSPercentComplete: 6.41163920165
PSOffPassYrdsPerGame: 0.000708867372633
Intercept: 1.13018502735

R-Sqr on Seasons 2014 to 2016: 0.52100841563
Train Data Size: 143
Test Data Size:70


In [403]:
PassAttPerGame = create_model(players, 'QB', 'PassAttPerGame', 
                           ['PassAttPerGame', 'AvgPassYrds', 'PassTDAttRatio', 'OffPointsPerGame', 'GamesPlayedPercent'], 
                   game_limit=6, test_cutoff=2014, interacts=[(0,4), (1,3)])
ModelsQB.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.585682389826
PSAvgPassYrds: 4.66416477994
PSPassTDAttRatio: -19.9779011058
PSOffPointsPerGame: 1.46935242751
PSGamesPlayedPercent: -7.49648014459
PSPassAttPerGame*PSGamesPlayedPercent: 0.230692346385
PSAvgPassYrds*PSOffPointsPerGame: -0.173339137249
Intercept: -23.8767212999

R-Sqr on Seasons 2014 to 2016: 0.595266049834
Train Data Size: 184
Test Data Size:91


In [404]:
PassTDAttRatio = create_model(players, 'QB', 'PassTDAttRatio', 
                           ['PassTDAttRatio', 'AvgPassYrds', 'PercentComplete', 'GamesPlayedPercent', 'RushTDAttRatio'], 
                   game_limit=8, test_cutoff=2014)
ModelsQB.add_model('PassTDAttRatio', PassTDAttRatio)

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: 0.118074081227
PSAvgPassYrds: 0.00221885064841
PSPercentComplete: 0.0325126213073
PSGamesPlayedPercent: 0.0075644469316
PSRushTDAttRatio: 0.0508856621922
Intercept: -0.00569435143606

R-Sqr on Seasons 2014 to 2016: 0.236048860829
Train Data Size: 161
Test Data Size:78


In [405]:
AvgRushYrds = create_model(players, 'QB', 'AvgRushYrds', 
                           ['AvgRushYrds', 'RushAttPerGame', 'PassAttPerGame', 'OffPointsPerGame'], 
                   game_limit=8, test_cutoff=2014)
ModelsQB.add_model('AvgRushYrds', AvgRushYrds)

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.186097495049
PSRushAttPerGame: 0.606931980498
PSPassAttPerGame: -0.0429372088817
PSOffPointsPerGame: -0.0951528762998
Intercept: 4.5258439588

R-Sqr on Seasons 2014 to 2016: 0.36818219927
Train Data Size: 161
Test Data Size:78


In [406]:
RushAttPerGame = create_model(players, 'QB', 'RushAttPerGame', 
                           ['RushAttPerGame', 'RushTDAttRatio', 'PassRushAttRatio', 'GamesPlayedPercent'], 
                   game_limit=6, test_cutoff=2014, interacts=[(0,0)])
ModelsQB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.320401292597
PSRushTDAttRatio: 4.6558133053
PSPassRushAttRatio: -0.0159485482807
PSGamesPlayedPercent: -0.225122925987
PSRushAttPerGame*PSRushAttPerGame: 0.0453150359155
Intercept: 1.53310205211

R-Sqr on Seasons 2014 to 2016: 0.510334569854
Train Data Size: 184
Test Data Size:91


In [407]:
FumblesPerGame = create_model(players, 'QB', 'FumblesPerGame', 
                           ['FumblesPerGame', 'OffPassYrdsPerGame', 'PassAttPerGame'], 
                   game_limit=8, test_cutoff=2014, interacts=[(0,1)])
ModelsQB.add_model('FumblesPerGame', FumblesPerGame)

|=== Predicting FumblesPerGame ===|

PSFumblesPerGame: 0.00225734741466
PSOffPassYrdsPerGame: -0.00064973396875
PSPassAttPerGame: 0.00243968422387
PSFumblesPerGame*PSOffPassYrdsPerGame: 0.000395880115763
Intercept: 0.275483838641

R-Sqr on Seasons 2014 to 2016: 0.0699783290304
Train Data Size: 161
Test Data Size:78


In [385]:
PredictQB = ModelsQB.predict_df(playersQB)
PredictQB[PredictQB.Player == 'Tom Brady'][['Player', 'Team', 'Season', 'AvgPassYrds', 'PredAvgPassYrds']].tail()

NameError: name 'playersQB' is not defined

In [None]:
QB2015 = PredictQB[PredictQB.Season == '2015'][['Player', 'Season', 'Team', 'AvgPassYrds', 
            'PredAvgPassYrds', 'PassAttPerGame', 'PredPassAttPerGame']]
QB2016 = PredictQB[PredictQB.Season == '2016'][['Player', 'Season', 'Team', 'AvgPassYrds', 'PassAttPerGame']]
QB2016.columns = ['Player', 'Season', 'Team', '2016AvgPassYrds', '2016PassAttPerGame']
PredictQB2016 = pd.merge(QB2015, QB2016, on=('Player', 'Team'))
PredictQB2016 = PredictQB2016[['Player', 'Team', 'PredAvgPassYrds', '2016AvgPassYrds', 'PredPassAttPerGame', '2016PassAttPerGame']].sort(columns=('2016AvgPassYrds'), ascending=False)
PredictQB2016.head()

In [None]:
def predict_total_pass(row):
    return row.PredAvgPassYrds * row.PredPassAttPerGame * 16

def actual_total_pass(row):
    return row['2016AvgPassYrds'] * row['2016PassAttPerGame'] * 16

PredictQB2016['PredPassYrds'] = PredictQB2016.apply(predict_total_pass, axis=1)
PredictQB2016['2016PassYrds'] = PredictQB2016.apply(actual_total_pass, axis=1)
PredictQB2016 = PredictQB2016.sort(columns=('2016PassYrds'), ascending=False)
print('Total Correlation:')
print(PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']].corr())
print('Top 15 Correlation:')
print(PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']][:15].corr())
PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']]

#### Current Metrics

Total: 0.682

Top 15: 0.618

## Running Backs

In [408]:
ModelsRB = PositionModels('RB')

#### Primary Stats

In [409]:
AvgRushYrds = create_model(players, 'RB', 'AvgRushYrds', 
                           ['AvgRushYrds', 'RushTDAttRatio', 'RushAttPerGame'], 
                   game_limit=14, test_cutoff=2014, add_limits={'RushAttPerGame': 8}, interacts=[])
ModelsRB.add_model('AvgRushYrds', AvgRushYrds)

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.233007230324
PSRushTDAttRatio: -1.91252474272
PSRushAttPerGame: -0.0100144505186
Intercept: 3.50947407628

R-Sqr on Seasons 2014 to 2016: 0.112014723196
Train Data Size: 133
Test Data Size:51


In [410]:
RushAttPerGame = create_model(players, 'RB', 'RushAttPerGame', 
                           ['RushAttPerGame', 'AvgRushYrds'], 
                   game_limit=14, test_cutoff=2014)
ModelsRB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.829612058324
PSAvgRushYrds: 0.350216329528
Intercept: 0.0965459334221

R-Sqr on Seasons 2014 to 2016: 0.742479651202
Train Data Size: 329
Test Data Size:149


In [411]:
RushTDAttRatio = create_model(players, 'RB', 'RushTDAttRatio', 
                           ['RushTDAttRatio', 'AvgRushYrds', 'OffRushYrdsPerGame'], 
                   game_limit=14, test_cutoff=2014, interacts=[(0,2)])
ModelsRB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushTDAttRatio ===|

PSRushTDAttRatio: 3.35812749939
PSAvgRushYrds: -0.00274617361922
PSOffRushYrdsPerGame: 0.000452560830186
PSRushTDAttRatio*PSOffRushYrdsPerGame: -0.0269480824138
Intercept: -0.0172543176205

R-Sqr on Seasons 2014 to 2016: 0.217237107317
Train Data Size: 329
Test Data Size:149


#### Secondary Stats

In [412]:
# create boolean ispasser as addition feature for AvgPassYrds?
# maybe limit train set to only passers and only predict if predicted to be a passer/pass att > certain margin

In [413]:
AvgPassYrds = create_model(players, 'RB', 'AvgPassYrds', 
                           ['AvgPassYrds'], 
                   game_limit=14, test_cutoff=2014, interacts=[(0,0)])
ModelsRB.add_model('AvgPassYrds', AvgPassYrds)

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.0355588811577
PSAvgPassYrds*PSAvgPassYrds: 0.00361711252065
Intercept: 5.18121883081

R-Sqr on Seasons 2014 to 2016: 0.00508318315398
Train Data Size: 329
Test Data Size:149


In [414]:
PassAttPerGame = create_model(players, 'RB', 'PassAttPerGame', 
                           ['PassAttPerGame'], 
                   game_limit=14, test_cutoff=2014, interacts=[(0,0)])
ModelsRB.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 1.02159768294
PSPassAttPerGame*PSPassAttPerGame: -0.0568342700929
Intercept: 0.389867015535

R-Sqr on Seasons 2014 to 2016: 0.545003585184
Train Data Size: 329
Test Data Size:149


In [415]:
FumblesPerGame = create_model(players, 'RB', 'FumblesPerGame', 
                           ['FumblesPerGame', 'RushAttPerGame'], 
                   game_limit=14, test_cutoff=2014, interacts=[])
ModelsRB.add_model('FumblesPerGame', FumblesPerGame)

|=== Predicting FumblesPerGame ===|

PSFumblesPerGame: 0.120723512678
PSRushAttPerGame: 0.00465479290049
Intercept: 0.0159142261803

R-Sqr on Seasons 2014 to 2016: 0.274732796335
Train Data Size: 329
Test Data Size:149


In [394]:
def predict_total_rush(row):
    return row.PredAvgRushYrds * row.PredRushAttPerGame * 16

def actual_total_rush(row):
    return row['2016AvgRushYrds'] * row['2016RushAttPerGame'] * 16

PredictRB = ModelsRB.predict_df(players[players.Position == 'RB'])
RB2015 = PredictRB[PredictRB.Season == '2015'][['Player', 'Season', 'Team', 'AvgRushYrds', 
            'PredAvgRushYrds', 'RushAttPerGame', 'PredRushAttPerGame']]
RB2016 = PredictRB[PredictRB.Season == '2016'][['Player', 'Season', 'Team', 'AvgRushYrds', 'RushAttPerGame']]
RB2016.columns = ['Player', 'Season', 'Team', '2016AvgRushYrds', '2016RushAttPerGame']
PredictRB2016 = pd.merge(RB2015, RB2016, on=('Player', 'Team'))
PredictRB2016 = PredictRB2016[['Player', 'Team', 'PredAvgRushYrds', '2016AvgRushYrds', 'PredRushAttPerGame', '2016RushAttPerGame']].sort(columns=('2016AvgRushYrds'), ascending=False)
PredictRB2016['PredRushYrds'] = PredictRB2016.apply(predict_total_rush, axis=1)
PredictRB2016['2016RushYrds'] = PredictRB2016.apply(actual_total_rush, axis=1)
PredictRB2016 = PredictRB2016.sort(columns=('2016RushYrds'), ascending=False)
print('Total Correlation:')
print(PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']].corr())
print('Top 15 Correlation:')
print(PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']][:15].corr())
PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']]

Predicting AvgRushYrds...


KeyError: ('PSAvgRushYrds', 'occurred at index 7')

#### Current Metrics
Total: 0.7

Top 15: 0.19

there is a bigger issue with unpredictable number of games played. most overpredictions
are related to not controlling for games played, but performance is still poor
especially for the top running backs. hopefully will improve with interactions

## Wide Receivers

In [416]:
ModelsWR = PositionModels('WR')

#### Primary Stats

In [417]:
AvgPassYrds = create_model(players, 'WR', 'AvgPassYrds', 
                           ['AvgPassYrds', 'OffPassYrdsPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[(0,1)])
ModelsWR.add_model('AvgPassYrds', AvgPassYrds)

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: -0.423650868497
PSOffPassYrdsPerGame: -0.0163688784131
PSAvgPassYrds*PSOffPassYrdsPerGame: 0.00252492548065
Intercept: 10.3358299284

R-Sqr on Seasons 2014 to 2016: 0.0888486413396
Train Data Size: 322
Test Data Size:145


In [418]:
PassAttPerGame = create_model(players, 'WR', 'PassAttPerGame', 
                           ['PassAttPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 0})
ModelsWR.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.764821757917
Intercept: 1.47599181756

R-Sqr on Seasons 2014 to 2016: 0.613892036713
Train Data Size: 486
Test Data Size:228


In [419]:
PassTDAttRatio = create_model(players, 'WR', 'PassTDAttRatio', 
                           ['PassTDAttRatio', 'OffPassYrdsPerGame', 'OffPointsPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[(0,0), (0,3), (0,1)])
ModelsWR.add_model('PassTDAttRatio', PassTDAttRatio)

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: -0.303232791479
PSOffPassYrdsPerGame: -5.0709442713e-05
PSOffPointsPerGame: 0.000265962867502
PSPassTDAttRatio*PSPassTDAttRatio: 1.27255018912
PSPassTDAttRatio*PSPassTDAttRatio*PSPassTDAttRatio: -10.9152195032
PSPassTDAttRatio*PSOffPassYrdsPerGame: 0.00202838561932
Intercept: 0.0433359431419

R-Sqr on Seasons 2014 to 2016: 0.0963872329005
Train Data Size: 322
Test Data Size:145


#### Secondary Stats

In [420]:
ModelsWR.create_model(players, 'AvgRushYrds', ['AvgRushYrds', 'RushAttPerGame', 'OffRushYrdsPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={})

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.147727772425
PSRushAttPerGame: 3.52344002527
PSOffRushYrdsPerGame: -0.0181129923799
Intercept: 4.03411535313

R-Sqr on Seasons 2014 to 2016: 0.0773486183827
Train Data Size: 486
Test Data Size:228


In [421]:
ModelsWR.create_model(players, 'RushAttPerGame', ['RushAttPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={}, interacts=[(0,0)])

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.983191135993
PSRushAttPerGame*PSRushAttPerGame: -0.105245457362
Intercept: 0.00850077169574

R-Sqr on Seasons 2014 to 2016: 0.480727395623
Train Data Size: 486
Test Data Size:228


In [None]:
# static RushTDAttRatio for WRs

In [None]:
PredictWR = ModelsWR.predict_df(players[players.Position == 'WR'])
WR2015 = PredictWR[PredictWR.Season == '2015'][['Player', 'Season', 'Team', 'AvgPassYrds', 
            'PredAvgPassYrds', 'PassAttPerGame', 'PredPassAttPerGame']]
WR2016 = PredictWR[PredictWR.Season == '2016'][['Player', 'Season', 'Team', 'AvgPassYrds', 'PassAttPerGame']]
WR2016.columns = ['Player', 'Season', 'Team', '2016AvgPassYrds', '2016PassAttPerGame']
PredictWR2016 = pd.merge(WR2015, WR2016, on=('Player', 'Team'))
PredictWR2016 = PredictWR2016[['Player', 'Team', 'PredAvgPassYrds', '2016AvgPassYrds', 'PredPassAttPerGame', '2016PassAttPerGame']].sort(columns=('2016AvgPassYrds'), ascending=False)
PredictWR2016['PredPassYrds'] = PredictWR2016.apply(predict_total_pass, axis=1)
PredictWR2016['2016PassYrds'] = PredictWR2016.apply(actual_total_pass, axis=1)
PredictWR2016 = PredictWR2016.sort(columns=('PredPassYrds'), ascending=False)
print('Total Correlation:')
print(PredictWR2016[['Player', 'PredPassYrds', '2016PassYrds']].corr())
print('Top 15 Correlation:')
print(PredictWR2016[['Player', 'PredPassYrds', '2016PassYrds']][:15].corr())
#PredictWR2016[['Player', 'PredPassYrds', '2016PassYrds']]

#### Current Metrics

Total: 0.739

Top 15: 0.48

## Tight Ends

In [422]:
ModelsTE = PositionModels('TE')

#### Primary Stats

In [423]:
ModelsTE.create_model(players, 'AvgPassYrds', ['AvgPassYrds'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[])

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.276619249957
Intercept: 5.42182785496

R-Sqr on Seasons 2014 to 2016: 0.101981319253
Train Data Size: 108
Test Data Size:53


In [424]:
ModelsTE.create_model(players, 'PassAttPerGame', ['PassAttPerGame', 'OffPassYrdsPerGame', 'GamesPlayedPercent'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 2}, interacts=[(0,2)])

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.261182571605
PSOffPassYrdsPerGame: 0.00687205792516
PSGamesPlayedPercent: -0.307633412213
PSPassAttPerGame*PSGamesPlayedPercent: 0.29136443834
Intercept: 1.32734612382

R-Sqr on Seasons 2014 to 2016: 0.446777129779
Train Data Size: 178
Test Data Size:80


In [425]:
ModelsTE.create_model(players, 'PassTDAttRatio', ['PassTDAttRatio'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[])

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: 0.21132866975
Intercept: 0.041303701398

R-Sqr on Seasons 2014 to 2016: 0.0761600670352
Train Data Size: 108
Test Data Size:53


In [426]:
ModelsTE.create_model(players, 'AvgRushYrds', ['AvgRushYrds'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[])

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.106086642053
Intercept: 0.353413684091

R-Sqr on Seasons 2014 to 2016: 0.0255345646289
Train Data Size: 108
Test Data Size:53


In [427]:
ModelsTE.create_model(players, 'RushAttPerGame', ['RushAttPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4}, interacts=[])

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.607057579655
Intercept: 0.00724529774755

R-Sqr on Seasons 2014 to 2016: 0.105662979561
Train Data Size: 108
Test Data Size:53


In [395]:
PredictTE = ModelsTE.predict_df(players[players.Position == 'TE'])
TE2015 = PredictTE[PredictTE.Season == '2015'][['Player', 'Season', 'Team', 'AvgPassYrds', 
            'PredAvgPassYrds', 'PassAttPerGame', 'PredPassAttPerGame']]
TE2016 = PredictTE[PredictTE.Season == '2016'][['Player', 'Season', 'Team', 'AvgPassYrds', 'PassAttPerGame']]
TE2016.columns = ['Player', 'Season', 'Team', '2016AvgPassYrds', '2016PassAttPerGame']
PredictTE2016 = pd.merge(TE2015, TE2016, on=('Player', 'Team'))
PredictTE2016 = PredictTE2016[['Player', 'Team', 'PredAvgPassYrds', '2016AvgPassYrds', 'PredPassAttPerGame', '2016PassAttPerGame']].sort(columns=('2016AvgPassYrds'), ascending=False)
PredictTE2016['PredPassYrds'] = PredictTE2016.apply(predict_total_pass, axis=1)
PredictTE2016['2016PassYrds'] = PredictTE2016.apply(actual_total_pass, axis=1)
PredictTE2016 = PredictTE2016.sort(columns=('PredPassYrds'), ascending=False)
print('Total Correlation:')
print(PredictTE2016[['Player', 'PredPassYrds', '2016PassYrds']].corr())
print('Top 15 Correlation:')
print(PredictTE2016[['Player', 'PredPassYrds', '2016PassYrds']][:15].corr())
PredictTE2016[['Player', 'PredPassYrds', '2016PassYrds']]

NameError: name 'ModelsTE' is not defined

## Adding Depth Chart Info

In [2]:
conn = sqlite3.connect('fantasy.db')
c = conn.cursor()

In [3]:
c.execute('ALTER TABLE PlayerSeason ADD Rank INT')

OperationalError: duplicate column name: Rank

In [3]:
c.execute('ALTER TABLe PlayerSeason ADD PrevRank INT')

OperationalError: duplicate column name: PrevRank

In [4]:
c.execute('''
    UPDATE PlayerSeason 
    SET Rank = (SELECT Roster.Rank FROM Roster 
        WHERE REPLACE(PlayerSeason.Player, \'.\', \'\') = REPLACE(Roster.Player, \'.\', \'\') 
        COLLATE NOCASE AND PlayerSeason.Season = Roster.Season
        AND PlayerSeason.Team = Roster.Team AND PlayerSeason.Position = Roster.Position)
    ''')

<sqlite3.Cursor at 0x94d9b20>

In [19]:
n = 0
for row in c.execute('SELECT * FROM PlayerSeason WHERE Rank IS NOT NULL ORDER BY Games DESC, Season DESC'):
    print(row)
    n += 1
    if n == 100:
        break

('Joe Banyard', 'MIN', 14, 28, 8, 43, 0, 6, 6, 42, 0, 0, 0, 'RB', '2014', 3, None)
('James Hanna', 'DAL', 11, 18, 0, 0, 0, 11, 8, 86, 0, 0, 0, 'TE', '2012', 3, None)
('Chaz Schilens', 'OAK', 33, 18, 1, -2, 0, 32, 17, 236, 2, 0, 0, 'WR', '2008', 1, None)
('Zach Miller', 'OAK', 86, 18, 0, 0, 0, 86, 64, 902, 1, 0, 0, 'TE', '2008', 1, 1)
('Jerricho Cotchery', 'NYJ', 113, 17, 2, 8, 0, 111, 72, 858, 5, 1, 0, 'WR', '2008', 1, 1)
('Davone Bess', 'MIA', 76, 17, 1, 13, 0, 75, 57, 604, 1, 0, 0, 'WR', '2008', 1, None)
('Thomas Jones', 'NYJ', 356, 17, 314, 1471, 13, 42, 37, 207, 2, 1, 0, 'RB', '2008', 1, 1)
('Ronnie Brown', 'MIA', 276, 17, 230, 1017, 10, 43, 34, 265, 0, 2, 0, 'RB', '2008', 1, 4)
('Leon Washington', 'NYJ', 141, 17, 79, 467, 7, 62, 50, 376, 2, 2, 0, 'RB', '2008', 2, 2)
('Ricky Williams', 'MIA', 212, 17, 173, 708, 4, 39, 31, 225, 1, 2, 0, 'RB', '2008', 2, None)
('Patrick Cobbs', 'MIA', 42, 17, 15, 111, 2, 27, 22, 291, 2, 0, 0, 'RB', '2008', 3, 2)
('Anthony Fasano', 'MIA', 53, 17, 0, 0

In [13]:
for row in c.execute('SELECT * FROM Roster WHERE Player = \'David Johnson\''):
    print(row)

('David Johnson', 'ARI', 'RB', '2015', 2)
('David Johnson', 'ARI', 'RB', '2016', 1)
('David Johnson', 'LAC', 'TE', '2014', 2)
('David Johnson', 'LAC', 'TE', '2015', 1)
('David Johnson', 'PIT', 'TE', '2009', 3)
('David Johnson', 'PIT', 'TE', '2016', 2)


In [17]:
for row in c.execute('SELECT * FROM Roster WHERE Player LIKE \'%Tyreek Hill%\''):
    print(row)

('Tyreek Hill', 'KC', 'WR', '2016', 2)


In [6]:
conn.commit()

In [87]:
conn.close()

#### Rank Difference Feature

Adding a feature of player's rank change since his previous season

In [14]:
sql_input = '''UPDATE PlayerSeason
                SET PrevRank = 
                    (SELECT Rank FROM PlayerSeason p2 
                    WHERE p2.Player = PlayerSeason.Player AND 
                    p2.Team = PlayerSeason.Team AND
                    p2.Position = PlayerSeason.Position AND 
                    CAST(PlayerSeason.Season AS DECIMAL)-1 = CAST(p2.Season AS DECIMAL))'''

c.execute(sql_input)

<sqlite3.Cursor at 0x94d9b20>

In [11]:
conn.commit()

## Predicting Games Played

In [350]:
rank_lbls = player_lbls + ['Rank', 'PrevRank']
rank_df = select_to_df('fantasy.db', 'PlayerSeason', rank_lbls, where='WHERE Season > 2007')
rank_df = add_features(rank_df)

Because the number of fourth and fifth strings are so low, and because I'm planning to treat ranks and rank differences as categorical variables, I'm forcing every fourth and fifth string to be considered a third string.

In [351]:
def rank_fix(row, col):
    if row[col] == 5 or row[col] == 4:
        return 3
    
    if row[col] is None:
        return 'Bench'
    
    return row[col]

rank_df['Rank'] = rank_df.apply(rank_fix, args=('Rank',), axis=1)
rank_df['PrevRank'] = rank_df.apply(rank_fix, args=('PrevRank',), axis=1)

The first plan is to treat the combination of (Rank, PrevRank) as a categorical variable. This is because I wanted the best way of finding an interaction between the two, which is difficult to do in the standard mathematical way as the difference between the two does not include information about the player's current season rank. For example, if I just took the rank difference, then the model would see no difference from a 3rd string going to 2nd string and a 2nd string going to 1st string, which should be different behaviors. It seems above that I have just enough data available to where this might work.

In [352]:
def rank_change(row):
    return '('+str(row['PrevRank']).replace('.0', '')+' -> '+str(row['Rank']).replace('.0', '')+')'

rank_df['RankChange'] = rank_df.apply(rank_change, axis=1)

In [360]:
GamesModelQB = create_model(rank_df, 'QB', 'GamesPlayedPercent', 
                           ['GamesPlayedPercent'], 
                   game_limit=0, test_cutoff=2013, interacts=[(0,n) for n in range(1,len(rank_df['RankChange'].unique()))], 
             cat_features=['RankChange'])

|=== Predicting GamesPlayedPercent ===|

PSGamesPlayedPercent: 1.5
(1 -> 1): 0.377361898717
(1 -> 2): 0.286483455922
(1 -> 3): 0.00538524705777
(1 -> Bench): 1.16727049296
(2 -> 1): -0.334669162213
(2 -> 2): -0.144074691811
(2 -> 3): 0.0378062072451
(2 -> Bench): -0.103265221326
(3 -> 1): -0.292742947901
(3 -> 2): 0.247862598223
(3 -> 3): -0.170229507041
(3 -> Bench): -0.442183976613
(Bench -> 1): -0.0282767963747
(Bench -> 2): -0.148920451922
(Bench -> 3): -0.187577637882
(Bench -> Bench): -0.270229507041
PSGamesPlayedPercent*(1 -> 1): -1.31956293339
PSGamesPlayedPercent*(1 -> 2): -1.57407407407
PSGamesPlayedPercent*(1 -> 3): -0.327868852459
PSGamesPlayedPercent*(1 -> Bench): -9.0
PSGamesPlayedPercent*(2 -> 1): -0.575862068966
PSGamesPlayedPercent*(2 -> 2): -1.00683090705
PSGamesPlayedPercent*(2 -> 3): -1.59523809524
PSGamesPlayedPercent*(2 -> Bench): -0.471428571429
PSGamesPlayedPercent*(3 -> 1): -0.672043010753
PSGamesPlayedPercent*(3 -> 2): -1.86842105263
PSGamesPlayedPercent*(3 ->

In [361]:
GamesModelRB = create_model(rank_df, 'RB', 'GamesPlayedPercent', 
                           ['GamesPlayedPercent'], 
                   game_limit=0, test_cutoff=2013, interacts=[(0,n) for n in range(1,len(rank_df['RankChange'].unique()))], 
             cat_features=['RankChange'])

|=== Predicting GamesPlayedPercent ===|

PSGamesPlayedPercent: 0.150003835377
(1 -> 1): 0.254249846785
(1 -> 2): -0.22243351371
(1 -> 3): -0.870655198793
(1 -> Bench): 0.626055327523
(2 -> 1): 0.520792169628
(2 -> 2): -0.809801999295
(2 -> 3): 0.713215076121
(2 -> Bench): 0.423596626025
(3 -> 1): -0.818793496665
(3 -> 2): -0.114589797949
(3 -> 3): -0.0324656970136
(3 -> Bench): 0.094700232517
(Bench -> 1): -0.360851277224
(Bench -> 2): 0.375324924025
(Bench -> 3): 0.00429403176803
(Bench -> Bench): 0.217362746257
PSGamesPlayedPercent*(1 -> 1): -0.0594073217032
PSGamesPlayedPercent*(1 -> 2): 0.414913291695
PSGamesPlayedPercent*(1 -> 3): 1.84999616462
PSGamesPlayedPercent*(1 -> Bench): -1.83421436169
PSGamesPlayedPercent*(2 -> 1): -0.360530151166
PSGamesPlayedPercent*(2 -> 2): 1.03945664141
PSGamesPlayedPercent*(2 -> 3): -0.644849196201
PSGamesPlayedPercent*(2 -> Bench): -0.259492886472
PSGamesPlayedPercent*(3 -> 1): 0.991840136255
PSGamesPlayedPercent*(3 -> 2): 0.398519371374
PSGamesPla

In [362]:
GamesModelWR = create_model(rank_df, 'WR', 'GamesPlayedPercent', 
                           ['GamesPlayedPercent'], 
                   game_limit=0, test_cutoff=2013, interacts=[(0,n) for n in range(1,len(rank_df['RankChange'].unique()))], 
             cat_features=['RankChange'])

|=== Predicting GamesPlayedPercent ===|

PSGamesPlayedPercent: 0.211771827963
(1 -> 1): 0.129926339749
(1 -> 2): 0.131293961615
(1 -> 3): -1.53791261823
(1 -> Bench): 0.0236937929478
(2 -> 1): -0.43183551891
(2 -> 2): 0.0941709373365
(2 -> 3): -0.0425209230564
(2 -> Bench): 0.0438760750094
(3 -> 1): 0.303937625889
(3 -> 2): 0.353309913603
(3 -> 3): 0.327905334992
(3 -> Bench): -0.0240749703436
(Bench -> 1): 0.538292201052
(Bench -> 2): -0.292047885983
(Bench -> 3): 0.206701912238
(Bench -> Bench): 0.175283822086
PSGamesPlayedPercent*(1 -> 1): 0.0118306565091
PSGamesPlayedPercent*(1 -> 2): -0.11740563078
PSGamesPlayedPercent*(1 -> 3): 2.23401130457
PSGamesPlayedPercent*(1 -> Bench): 0.0400371445407
PSGamesPlayedPercent*(2 -> 1): 0.51197462426
PSGamesPlayedPercent*(2 -> 2): -0.0371974806303
PSGamesPlayedPercent*(2 -> 3): 0.0492838068445
PSGamesPlayedPercent*(2 -> Bench): 0.193232805957
PSGamesPlayedPercent*(3 -> 1): -0.228111697244
PSGamesPlayedPercent*(3 -> 2): -0.297045107315
PSGamesPl

In [363]:
GamesModelTE = create_model(rank_df, 'TE', 'GamesPlayedPercent', 
                           ['GamesPlayedPercent'], 
                   game_limit=0, test_cutoff=2013, interacts=[(0,n) for n in range(1,len(rank_df['RankChange'].unique()))], 
             cat_features=['RankChange'])

|=== Predicting GamesPlayedPercent ===|

PSGamesPlayedPercent: 0.453963472039
(1 -> 1): -0.336599432008
(1 -> 2): -1.22483708002
(1 -> 3): 0.126289001808
(1 -> Bench): 0.752297066324
(2 -> 1): -0.221863849611
(2 -> 2): -0.30009970733
(2 -> 3): 0.686324844102
(2 -> Bench): -0.2278479613
(3 -> 1): 1.05774061471
(3 -> 2): -0.814109183676
(3 -> 3): 0.243174395517
(3 -> Bench): 0.144091284011
(Bench -> 1): 0.309588732991
(Bench -> 2): 0.175457295969
(Bench -> 3): -0.110223319409
(Bench -> Bench): -0.259382702081
PSGamesPlayedPercent*(1 -> 1): 0.0378580302635
PSGamesPlayedPercent*(1 -> 2): 0.924085308449
PSGamesPlayedPercent*(1 -> 3): -0.389447343007
PSGamesPlayedPercent*(1 -> Bench): -2.95396347204
PSGamesPlayedPercent*(2 -> 1): -0.0988819162048
PSGamesPlayedPercent*(2 -> 2): -0.0166515362317
PSGamesPlayedPercent*(2 -> 3): -1.00951902759
PSGamesPlayedPercent*(2 -> Bench): 0.0708984064139
PSGamesPlayedPercent*(3 -> 1): -1.4700925043
PSGamesPlayedPercent*(3 -> 2): 0.440267297192
PSGamesPlayed

In [14]:
sim_player = {'PSGamesPlayedPercent': 0.8,
                '(1 -> 1)': 0,
                '(1 -> 2)': 0,
                '(1 -> 3)': 0,
                '(1 -> Bench)': 0,
                '(2 -> 1)': 1,
                '(2 -> 2)': 0,
                '(2 -> 3)': 0,
                '(2 -> Bench)': 0,
                '(3 -> 1)': 0,
                '(3 -> 2)': 0, 
                '(3 -> 3)': 0,
                '(3 -> Bench)': 0,
                '(Bench -> 1)': 0,
                '(Bench -> 2)': 0, 
                '(Bench -> 3)': 0, 
                '(Bench -> Bench)': 0,
                 'RankChange': '(2 -> 1)',
                 'Position': 'QB'}

sim_player2 = {'PSGamesPlayedPercent': 0.8,
                  '1': 1,
                  '2': 0, 
                  '3': 0, 
                  'Bench': 0}

for var in sim_player.copy():
    if var != 'PSGamesPlayedPercent':
        sim_player['PSGamesPlayedPercent*'+var] = sim_player['PSGamesPlayedPercent']*sim_player[var]

TypeError: can't multiply sequence by non-int of type 'float'

While R-squared scores increased greatly compared to the old strategy of treating ranks, it is still less than desirable. Not only that, but much of it seems unintuitive. For example, the current model for WR's predict someone going from 2nd string to 1st string to play less than they did the previous season where they were a lower rank. 

The main problem is simply not enough data (as you can see above). By making the categories unique to (Position, Rank, PreviousRank) it reduces the amount of data for many of the categories to be less than 50. Even with this issue, predicting based on this combination still performs much better than just using the Rank and PrevRanks as the categorical variables, so I'm going to stick with this general method.

My next plan is to use the means of each rank (displayed below) to help with this prediction. I want to make my end prediction calculated by some combination of my model's prediction and the means for the ranks. The idea is to weigh my model's prediction heavier when it has more data available to it. So predicting a QB going from 1st string to 1st string (207 data points) will rely more on the model, whereas a QB going from 1st string to 3rd string (12 data points) will rely more on the current rank's mean.

In [678]:
rank_df.groupby(['Position', 'Rank'])['GamesPlayedPercent'].mean()

Position  Rank 
QB        1.0      0.859486
          2.0      0.323123
          3.0      0.287500
          Bench    0.230682
RB        1.0      0.863308
          2.0      0.862284
          3.0      0.701027
          Bench    0.616969
TE        1.0      0.908008
          2.0      0.871336
          3.0      0.762041
          Bench    0.432842
WR        1.0      0.914356
          2.0      0.822476
          3.0      0.657328
          Bench    0.467201
Name: GamesPlayedPercent, dtype: float64

In [364]:
sizes = pd.DataFrame(rank_df.groupby(['Position', 'RankChange']).size())
means = pd.DataFrame(rank_df.groupby(['Position', 'Rank'])['GamesPlayedPercent'].mean())

In [365]:
def get_size(pos, rank_change, groupby):
    return int(groupby.loc[(pos, rank_change)])

In [366]:
def predict_games_played(player, model, sizes, means, pref_games=200):
    model_predict = model.predict(player)
    if model_predict < 0:
        model_predict = 0
    if model_predict > 1.0:
        model_predict = 1
        
    model_prop = sqrt(sizes.loc[(player['Position'], player['RankChange'])]) / sqrt(pref_games)
    if model_prop > 1:
        model_prop = 1
        
    mean_predict = means.loc[(player['Position'], player['Rank'])]
    
    return (model_predict * model_prop) + (mean_predict * (1 - model_prop))

In [471]:
sim_player = {'Position': 'RB',
                'Rank': 2.0,
                'RankChange': '(1 -> 2)',
                'PSGamesPlayedPercent': 0.9,
                '(1 -> 1)': 0,
                '(1 -> 2)': 1,
                '(1 -> 3)': 0,
                '(1 -> Bench)': 0,
                '(2 -> 1)': 0,
                '(2 -> 2)': 0,
                '(2 -> 3)': 0,
                '(2 -> Bench)': 0,
                '(3 -> 1)': 0,
                '(3 -> 2)': 0, 
                '(3 -> 3)': 0,
                '(3 -> Bench)': 0,
                '(Bench -> 1)': 0,
                '(Bench -> 2)': 0, 
                '(Bench -> 3)': 0, 
                '(Bench -> Bench)': 0,
             }

for var in sim_player.copy():
    if var not in ['Position', 'Rank', 'RankChange', 'PSGamesPlayedPercent']:
        sim_player['PSGamesPlayedPercent*'+var] = sim_player['PSGamesPlayedPercent']*sim_player[var]

In [367]:
del rank_df['PrevRank']

In [359]:
rank_df.tail()

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,AvgRushYrds,PercentComplete,PlaysPerGame,GamesPlayedPercent,InterceptionRatio,PassAttPerGame,RushAttPerGame,FumblesPerGame,PassRushAttRatio,RankChange
4876,Kellen Davis,TE,NYJ,2016,1.0,8.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.125,0.5,0.0,0.125,0.0,0.0,1.0,(2 -> 1)
4877,Troy Niklas,TE,ARI,2016,2.0,3.0,0.0,0.0,0.0,2.0,...,0.0,0.5,0.666667,0.1875,0.0,0.666667,0.0,0.0,1.0,(2 -> Bench)
4878,Hakeem Valles,TE,ARI,2016,2.0,11.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.181818,0.6875,0.0,0.181818,0.0,0.0,1.0,(Bench -> 2)
4879,Khari Lee,TE,DET,2016,1.0,8.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.125,0.5,0.0,0.125,0.0,0.0,1.0,(2 -> 3)
4880,James O'Shaughnessy,TE,KC,2016,3.0,16.0,0.0,0.0,0.0,3.0,...,0.0,0.666667,0.1875,1.0,0.0,0.1875,0.0,0.0,1.0,(2 -> 3)


In [368]:
def build_season_df(df, interacts=False, categorical=[], season_range=(2009, 2017)):
    primary_key = ['Player', 'Position', 'Team']
    left_vars = df.columns.copy()
    right_vars = ['PS'+str(var) for var in df.columns.copy() if var not in primary_key]
    season_vars = list(left_vars.copy()) + list(right_vars)
    
    season_df = pd.DataFrame(columns=(list(left_vars).extend(right_vars)))
    
    # build base dataframe
    for season in range(season_range[0], season_range[1]):
        prev_season = str(season - 1)
        season = str(season)
        
        left_df = df[left_vars][df.Season == season]
        right_df = df[left_vars][df.Season == prev_season]
        right_df.columns = primary_key + right_vars
        left_df = pd.merge(left_df, right_df, on=primary_key)
        season_df = season_df.append(left_df)

    # add categorical dummy variables
    for cur_cat_var in categorical:
        dummy_df = pd.get_dummies(season_df[cur_cat_var])
        season_vars.extend(dummy_df.columns)
        season_df = pd.concat([season_df, dummy_df], axis=1)
    
    # add interaction variables
    numerical_vars = []
    for cur_var in season_vars:
        if cur_var not in categorical and cur_var not in primary_key and cur_var != 'Season':
            numerical_vars.append(cur_var)
    
    def interact_apply(row, numerical_vars, outer_idx, inner_idx):
        return row[numerical_vars[outer_idx]] * row[numerical_vars[inner_idx]]
       
    return season_df.reindex(columns=season_vars)

In [821]:
season_df = build_season_df(rank_df, interacts=True, 
                            categorical=['RankChange', 'Rank'])
season_df.head()

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,(3 -> 3),(3 -> Bench),(Bench -> 1),(Bench -> 2),(Bench -> 3),(Bench -> Bench),1,2,3,Bench
0,Aaron Rodgers,QB,GB,2009,599.0,16.0,58.0,316.0,5.0,541.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,Drew Brees,QB,NO,2009,537.0,15.0,22.0,33.0,2.0,514.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,Matt Schaub,QB,HOU,2009,631.0,16.0,48.0,57.0,0.0,583.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,Peyton Manning,QB,IND,2009,590.0,16.0,19.0,-13.0,0.0,571.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,Tony Romo,QB,DAL,2009,585.0,16.0,35.0,105.0,1.0,550.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [822]:
def add_interacts(df, interacts):
    def apply_int(row, var1, var2):
        return row[var1] * row[var2]
    
    for tup in interacts:
        df[tup[0]+'*'+tup[1]] = df.apply(apply_int, args=(tup[0], tup[1],), axis=1)
    
    return df

In [823]:
for var in season_df['RankChange'].unique():
    season_df = add_interacts(season_df, [('PSGamesPlayedPercent', var)])

In [513]:
def apply_pred_games(row, games_mdl, sizes, means):
    return predict_games_played(row.to_dict(), games_mdl, sizes, means)

In [514]:
season_df['PredGamesPlayedPercent'] = season_df.apply(apply_pred_games, args=(qb_games_mdl, sizes, means,), axis=1)

In [819]:
season_df[['Player', 'Team', 'Season', 'Position', 'PredGamesPlayedPercent', 
           'GamesPlayedPercent', 'PSGamesPlayedPercent']].tail()

KeyError: "['PredGamesPlayedPercent'] not in index"

In [375]:
def extremify_col(df, var, mult_factor):
    mean = df[var].mean()
    def extremify_row(row, var, mean, mult_factor):
        new_val = (row[var] - mean) * mult_factor + mean
        
        if new_val < 0:
            return 0
        return new_val
    
    df[var] = df.apply(extremify_row, args=(var, mean, mult_factor,), axis=1)
    return df

## Final Predictions

In [428]:
def apply_pred_games(row, games_mdl, sizes, means):
    return predict_games_played(row.to_dict(), games_mdl, sizes, means)

In [429]:
def add_interacts(df, interacts):
    def apply_int(row, var1, var2):
        return row[var1] * row[var2]
    
    for tup in interacts:
        df[tup[0]+'*'+tup[1]] = df.apply(apply_int, args=(tup[0], tup[1],), axis=1)
    
    return df

In [430]:
off = select_to_df(fdb, 'OffenseSeason', offense_lbls)
off = add_feature(off, 'OffPassRushRatio', pass_rush_rat)
off = add_feature(off, 'OffYardsPerGame', yards_per_game)
off = add_feature(off, 'OffPassYrdsPerGame', pass_yrds_per_game)
off = add_feature(off, 'OffRushYrdsPerGame', rush_yrds_per_game)
off = add_feature(off, 'OffPointsPerGame', points_per_game)
off = off.drop(['Yards', 'PassYrds', 'RushYrds', 'Points'], axis=1)

In [431]:
season_df = pd.merge(rank_df, off, on=['Team', 'Season'])
season_df = build_season_df(season_df, interacts=True, categorical=['RankChange', 'Rank'])

for var in season_df['RankChange'].unique():
    season_df = add_interacts(season_df, [('PSGamesPlayedPercent', var)])

In [432]:
GamesPredictQB = season_df[season_df.Position == 'QB']
GamesPredictQB['PredGamesPlayedPercent'] = GamesPredictQB.apply(apply_pred_games, args=(GamesModelQB, sizes, means,), axis=1)
GamesPredictRB = season_df[season_df.Position == 'RB']
GamesPredictRB['PredGamesPlayedPercent'] = GamesPredictRB.apply(apply_pred_games, args=(GamesModelRB, sizes, means,), axis=1)
GamesPredictWR = season_df[season_df.Position == 'WR']
GamesPredictWR['PredGamesPlayedPercent'] = GamesPredictWR.apply(apply_pred_games, args=(GamesModelWR, sizes, means,), axis=1)
GamesPredictTE = season_df[season_df.Position == 'TE']
GamesPredictTE['PredGamesPlayedPercent'] = GamesPredictTE.apply(apply_pred_games, args=(GamesModelTE, sizes, means,), axis=1)

In [433]:
GamesPredictQB = extremify_col(GamesPredictQB, 'PredGamesPlayedPercent', 1.25)
GamesPredictRB = extremify_col(GamesPredictRB, 'PredGamesPlayedPercent', 1.6)
GamesPredictWR = extremify_col(GamesPredictWR, 'PredGamesPlayedPercent', 1.55)
GamesPredictTE = extremify_col(GamesPredictTE, 'PredGamesPlayedPercent', 1.5)

In [434]:
StatsPredictQB = ModelsQB.predict_df(season_df[season_df.Position == 'QB'])
StatsPredictRB = ModelsRB.predict_df(season_df[season_df.Position == 'RB'])
StatsPredictWR = ModelsWR.predict_df(season_df[season_df.Position == 'WR'])
StatsPredictTE = ModelsTE.predict_df(season_df[season_df.Position == 'TE'])

Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting FumblesPerGame...
Predicting AvgRushYrds...
Predicting AvgPassYrds...
Predicting FumblesPerGame...
Predicting PassAttPerGame...
Predicting RushAttPerGame...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...


In [435]:
#StatsPredictWR = extremify_col(StatsPredictWR, 'PredPassAttPerGame', 1.2)
StatsPredictTE = extremify_col(StatsPredictTE, 'PredPassAttPerGame', 1.5)

In [436]:
games_vars = ['Player', 'Team', 'Position', 'Season', 'PredGamesPlayedPercent']
FinalPredictQB = pd.merge(GamesPredictQB[games_vars], StatsPredictQB, on=['Player', 'Team', 'Position', 'Season'])
FinalPredictRB = pd.merge(GamesPredictRB[games_vars], StatsPredictRB, on=['Player', 'Team', 'Position', 'Season'])
FinalPredictWR = pd.merge(GamesPredictWR[games_vars], StatsPredictWR, on=['Player', 'Team', 'Position', 'Season'])
FinalPredictTE = pd.merge(GamesPredictTE[games_vars], StatsPredictTE, on=['Player', 'Team', 'Position', 'Season'])

In [437]:
def total_pass(row):
    return row['PredAvgPassYrds'] * row['PredPassAttPerGame'] * row['PredGamesPlayedPercent'] * 16

def total_rush(row):
    return row['PredAvgRushYrds'] * row['PredRushAttPerGame'] * row['PredGamesPlayedPercent'] * 16

In [438]:
FinalPredictQB['PredSeasonPassYrds'] = FinalPredictQB.apply(total_pass, axis=1)
FinalPredictQB['PredSeasonRushYrds'] = FinalPredictQB.apply(total_rush, axis=1)
FinalPredictRB['PredSeasonPassYrds'] = FinalPredictRB.apply(total_pass, axis=1)
FinalPredictRB['PredSeasonRushYrds'] = FinalPredictRB.apply(total_rush, axis=1)
FinalPredictWR['PredSeasonPassYrds'] = FinalPredictWR.apply(total_pass, axis=1)
FinalPredictWR['PredSeasonRushYrds'] = FinalPredictWR.apply(total_rush, axis=1)
FinalPredictTE['PredSeasonPassYrds'] = FinalPredictTE.apply(total_pass, axis=1)
FinalPredictTE['PredSeasonRushYrds'] = FinalPredictTE.apply(total_rush, axis=1)

In [439]:
def pass_error(row):
    return row['PredSeasonPassYrds'] - row['PassYrds']

def rush_error(row):
    return row['PredSeasonRushYrds'] - row['RushYrds']

In [440]:
FinalPredictQB['PassYrdsError'] = FinalPredictQB.apply(pass_error, axis=1)
FinalPredictQB['RushYrdsError'] = FinalPredictQB.apply(rush_error, axis=1)
FinalPredictRB['PassYrdsError'] = FinalPredictRB.apply(pass_error, axis=1)
FinalPredictRB['RushYrdsError'] = FinalPredictRB.apply(rush_error, axis=1)
FinalPredictWR['PassYrdsError'] = FinalPredictWR.apply(pass_error, axis=1)
FinalPredictWR['RushYrdsError'] = FinalPredictWR.apply(rush_error, axis=1)
FinalPredictTE['PassYrdsError'] = FinalPredictTE.apply(pass_error, axis=1)
FinalPredictTE['RushYrdsError'] = FinalPredictTE.apply(rush_error, axis=1)

In [441]:
rush_vars = ['Player', 'Team', 'Season', 'RankChange', 'PredSeasonRushYrds', 'RushYrds', 'PSRushYrds', 'RushYrdsError'
                'PredGamesPlayedPercent', 'GamesPlayedPercent', 'PSGamesPlayedPercent', 'PredAvgRushYrds',
                'AvgRushYrds', 'PSAvgRushYrds', 'PredRushAttPerGame', 'RushAttPerGame']

pass_vars = ['Player', 'Team', 'Season', 'RankChange', 'PredSeasonPassYrds', 'PassYrds', 'PSPassYrds', 'PassYrdsError',
                'PredGamesPlayedPercent', 'GamesPlayedPercent', 'PSGamesPlayedPercent', 'PredAvgPassYrds',
                'AvgPassYrds', 'PSAvgPassYrds', 'PredPassAttPerGame', 'PassAttPerGame']

In [442]:
FinalPredictQB['PassYrdsError'].mean()

17.44127258950062

In [443]:
sp.stats.pearsonr(FinalPredictQB['PredSeasonRushYrds'], 
                  FinalPredictQB['RushYrds'])

(0.86842556143948169, 7.1560561913620034e-103)

In [444]:
def extremify_col(df, var, mult_factor):
    mean = df[var].mean()
    def extremify_row(row, var, mean, mult_factor):
        new_val = (row[var] - mean) * mult_factor + mean
        
        if new_val < 0:
            return 0
        return new_val
    
    df[var] = df.apply(extremify_row, args=(var, mean, mult_factor,), axis=1)
    return df

In [445]:
FinalPredictQB = extremify_col(FinalPredictQB, 'PredSeasonPassYrds', 1.25)
FinakPredictQB = extremify_col(FinalPredictQB, 'PredSeasonRushYrds', 1.6)

FinalPredictRB = extremify_col(FinalPredictRB, 'PredSeasonRushYrds', 1.4)
FinalPredictRB = extremify_col(FinalPredictRB, 'PredSeasonPassYrds', 1.5)

FinalPredictWR = extremify_col(FinalPredictWR, 'PredSeasonPassYrds', 1.6)

FinalPredictTE = extremify_col(FinalPredictTE, 'PredSeasonPassYrds', 1.55)

## Fantasy Point Scoring

First I need to fill in the rest of the dataframe with the statistics I didn't use models to predict (due to no correlation with any of the predictor variables).

In [447]:
def add_unpredicted(FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR):
    FinalPredictQB['PredRushTDAttRatio'] = FinalPredictQB['PSRushTDAttRatio'].mean()
    FinalPredictQB['PredInterceptionRatio'] = FinalPredictQB['PSInterceptionRatio'].mean()

    FinalPredictRB['PredPassTDAttRatio'] = FinalPredictRB['PSPassTDAttRatio'].mean()
    FinalPredictRB['PredInterceptionRatio'] = FinalPredictRB['PSInterceptionRatio'].mean()
    FinalPredictRB['PredRushTDAttRatio'] = FinalPredictRB['PSRushTDAttRatio'].mean()

    FinalPredictWR['PredFumblesPerGame'] = FinalPredictWR['PSFumblesPerGame'].mean()
    FinalPredictWR['PredRushTDAttRatio'] = FinalPredictWR['PSRushTDAttRatio'].mean()

    FinalPredictTE['PredFumblesPerGame'] = FinalPredictTE['PSFumblesPerGame'].mean()
    
    return FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR

FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR = \
    add_unpredicted(FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR)

In [448]:
def calc_final(FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR):
    FinalPredictQB['PredSeasonFumbles'] = FinalPredictQB['PredFumblesPerGame'] * FinalPredictQB['PredGamesPlayedPercent'] * 16
    FinalPredictRB['PredSeasonFumbles'] = FinalPredictRB['PredFumblesPerGame'] * FinalPredictRB['PredGamesPlayedPercent'] * 16
    FinalPredictWR['PredSeasonFumbles'] = FinalPredictWR['PredFumblesPerGame'] * FinalPredictWR['PredGamesPlayedPercent'] * 16
    FinalPredictTE['PredSeasonFumbles'] = FinalPredictTE['PredFumblesPerGame'] * FinalPredictTE['PredGamesPlayedPercent'] * 16

    FinalPredictQB['PredSeasonPassTDs'] = FinalPredictQB['PredPassTDAttRatio'] * \
        FinalPredictQB['PredPassAttPerGame'] * FinalPredictQB['PredGamesPlayedPercent'] * 16
    FinalPredictRB['PredSeasonPassTDs'] = FinalPredictRB['PredPassTDAttRatio'] * \
        FinalPredictRB['PredPassAttPerGame'] * FinalPredictRB['PredGamesPlayedPercent'] * 16
    FinalPredictWR['PredSeasonPassTDs'] = FinalPredictWR['PredPassTDAttRatio'] * \
        FinalPredictWR['PredPassAttPerGame'] * FinalPredictWR['PredGamesPlayedPercent'] * 16
    FinalPredictTE['PredSeasonPassTDs'] = FinalPredictTE['PredPassTDAttRatio'] * \
        FinalPredictTE['PredPassAttPerGame'] * FinalPredictTE['PredGamesPlayedPercent'] * 16

    FinalPredictQB['PredSeasonInterceptions'] = FinalPredictQB['PredInterceptionRatio'] * FinalPredictQB['PredPassAttPerGame'] * \
        FinalPredictQB['PredGamesPlayedPercent'] * 16
    FinalPredictRB['PredSeasonInterceptions'] = FinalPredictRB['PredInterceptionRatio'] * FinalPredictRB['PredPassAttPerGame'] * \
        FinalPredictRB['PredGamesPlayedPercent'] * 16

    FinalPredictQB['PredSeasonRushTDs'] = FinalPredictQB['PredRushTDAttRatio'] * \
        FinalPredictQB['PredRushAttPerGame'] * FinalPredictQB['PredGamesPlayedPercent'] * 16
    FinalPredictRB['PredSeasonRushTDs'] = FinalPredictRB['PredRushTDAttRatio'] * \
        FinalPredictRB['PredRushAttPerGame'] * FinalPredictRB['PredGamesPlayedPercent'] * 16
    FinalPredictWR['PredSeasonRushTDs'] = FinalPredictWR['PredRushTDAttRatio'] * \
        FinalPredictWR['PredRushAttPerGame'] * FinalPredictWR['PredGamesPlayedPercent'] * 16
        
    return FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR

FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR = \
    calc_final(FinalPredictQB, FinalPredictRB, FinalPredictTE, FinalPredictWR)

In [450]:
ScoreQB = {
    'PredSeasonPassYrds': 1/25,
    'PredSeasonPassTDs': 4,
    'PredSeasonInterceptions': -1,
    'PredSeasonFumbles': -2,
    'PredSeasonRushYrds': 1/10,
    'PredSeasonRushTDs': 6
}

ScoreRB = {
    'PredSeasonRushYrds': 1/10,
    'PredSeasonRushTDs': 6,
    'PredSeasonFumbles': -2,
    'PredSeasonPassYrds': 1/25,
    'PredSeasonPassTDs': 4,
    'PredSeasonInterceptions': -1
}

ScoreWR = {
    'PredSeasonPassYrds': 1/10,
    'PredSeasonPassTDs': 6,
    'PredSeasonFumbles': -2,
    'PredSeasonRushYrds': 1/10,
    'PredSeasonRushTDs': 6,
}

ScoreTE = {
    'PredSeasonPassYrds': 1/10,
    'PredSeasonPassTDs': 6,
    'PredSeasonFumbles': -2,
    'PredSeasonRushYrds': 1/10
}

ScoreK = {
    'PredSeasonFGoals': 3.25,
    'PredSeasonExtraPoints': 1
}

In [451]:
def score_player(row, score):
    total = 0
    for var in score:
        total += score[var]*row[var]
    
    return total

In [452]:
FinalPredictQB['PredFantasyPoints'] = FinalPredictQB.apply(score_player, args=(ScoreQB,), axis=1)

In [453]:
FinalPredictRB['PredFantasyPoints'] = FinalPredictRB.apply(score_player, args=(ScoreRB,), axis=1)
FinalPredictRB[['Player', 'Team', 'Season', 'PredFantasyPoints']][FinalPredictRB.Season == '2016'].sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,Season,PredFantasyPoints
632,Devonta Freeman,ATL,2016,186.97339
647,Doug Martin,TB,2016,183.289475
662,Jonathan Stewart,CAR,2016,167.147556
710,Todd Gurley,LAR,2016,165.165888
666,Latavius Murray,OAK,2016,163.689417
669,Le'Veon Bell,PIT,2016,162.415139
636,Frank Gore,IND,2016,159.238016
629,Mark Ingram,NO,2016,148.185092
677,LeSean McCoy,BUF,2016,139.01444
670,DeAngelo Williams,PIT,2016,136.244036


In [454]:
FinalPredictWR['PredFantasyPoints'] = FinalPredictWR.apply(score_player, args=(ScoreWR,), axis=1)
FinalPredictWR[['Player', 'Team', 'Season', 'PredFantasyPoints']][FinalPredictWR.Season == '2016'].sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,Season,PredFantasyPoints
736,Julio Jones,ATL,2016,272.708065
812,Jarvis Landry,MIA,2016,198.517016
834,Alshon Jeffery,CHI,2016,182.365645
815,Demaryius Thomas,DEN,2016,177.208777
739,T.Y. Hilton,IND,2016,166.320903
730,Randall Cobb,GB,2016,157.479877
783,Antonio Brown,PIT,2016,157.007314
733,Brandin Cooks,NO,2016,156.596619
822,DeAndre Hopkins,HOU,2016,151.157305
744,Pierre Garcon,WAS,2016,151.052634


In [455]:
FinalPredictTE['PredFantasyPoints'] = FinalPredictTE.apply(score_player, args=(ScoreTE,), axis=1)
FinalPredictTE[['Player', 'Team', 'Season', 'PredFantasyPoints']][FinalPredictTE.Season == '2016'].sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,Season,PredFantasyPoints
461,Greg Olsen,CAR,2016,176.767974
439,Jordan Reed,WAS,2016,163.813161
455,Delanie Walker,TEN,2016,154.701268
496,Gary Barnidge,CLE,2016,148.762246
482,Travis Kelce,KC,2016,148.00026
476,Zach Ertz,PHI,2016,144.807932
475,Rob Gronkowski,NE,2016,135.886649
479,Kyle Rudolph,MIN,2016,130.170642
443,Jason Witten,DAL,2016,127.410111
441,Antonio Gates,LAC,2016,120.079257


## 2017 Season Fantasy Predictions

In [456]:
season_2016 = pd.merge(rank_df[rank_df.Season == '2016'], off[off.Season == '2016'], on=['Team', 'Season'])

In [457]:
season_2017 = season_2016.copy()
season_2017['Season'] = '2017'
season_2017  = pd.concat([season_2017, season_2016])
season_2017 = build_season_df(season_2017, interacts=True, categorical=['RankChange', 'Rank'], season_range=(2016, 2018))

In [458]:
for var in season_2017['RankChange'].unique():
    season_2017 = add_interacts(season_2017, [('PSGamesPlayedPercent', var)])

In [459]:
QB2017 = season_2017[season_2017.Position == 'QB']
RB2017 = season_2017[season_2017.Position == 'RB']
WR2017 = season_2017[season_2017.Position == 'WR']
TE2017 = season_2017[season_2017.Position == 'TE']

In [460]:
QB2017 = QB2017.set_value(QB2017[QB2017.Player == 'Tom Brady'].index[0], 'PSGamesPlayedPercent', 1.0)
TE2017 = TE2017.set_value(TE2017[TE2017.Player == 'Rob Gronkowski'].index[0], 'PSGamesPlayedPercent', 1.0)

In [461]:
QB2017['PredGamesPlayedPercent'] = QB2017.apply(apply_pred_games, args=(GamesModelQB, sizes, means,), axis=1)
RB2017['PredGamesPlayedPercent'] = RB2017.apply(apply_pred_games, args=(GamesModelRB, sizes, means,), axis=1)
TE2017['PredGamesPlayedPercent'] = TE2017.apply(apply_pred_games, args=(GamesModelTE, sizes, means,), axis=1)
WR2017['PredGamesPlayedPercent'] = WR2017.apply(apply_pred_games, args=(GamesModelWR, sizes, means,), axis=1)

In [462]:
QB2017 = extremify_col(QB2017, 'PredGamesPlayedPercent', 1.25)
RB2017 = extremify_col(RB2017, 'PredGamesPlayedPercent', 1.6)
WR2017 = extremify_col(WR2017, 'PredGamesPlayedPercent', 1.55)
TE2017 = extremify_col(TE2017, 'PredGamesPlayedPercent', 1.5)

In [463]:
def round_down(row):
    if row['PredGamesPlayedPercent'] > 1.0:
        return 1.0
    return row['PredGamesPlayedPercent']

In [464]:
QB2017['PredGamesPlayedPercent'] = QB2017.apply(round_down, axis=1)

In [465]:
StatsPredictQB = ModelsQB.predict_df(season_2017[season_2017.Position == 'QB'])
StatsPredictRB = ModelsRB.predict_df(season_2017[season_2017.Position == 'RB'])
StatsPredictWR = ModelsWR.predict_df(season_2017[season_2017.Position == 'WR'])
StatsPredictTE = ModelsTE.predict_df(season_2017[season_2017.Position == 'TE'])

Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting FumblesPerGame...
Predicting AvgRushYrds...
Predicting AvgPassYrds...
Predicting FumblesPerGame...
Predicting PassAttPerGame...
Predicting RushAttPerGame...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...
Predicting PassAttPerGame...
Predicting AvgPassYrds...
Predicting RushAttPerGame...
Predicting PassTDAttRatio...
Predicting AvgRushYrds...


In [466]:
games_vars = ['Player', 'Team', 'Position', 'Season', 'PredGamesPlayedPercent']
QB2017 = pd.merge(QB2017[games_vars], StatsPredictQB, on=['Player', 'Team', 'Position', 'Season'])
RB2017 = pd.merge(RB2017[games_vars], StatsPredictRB, on=['Player', 'Team', 'Position', 'Season'])
TE2017 = pd.merge(TE2017[games_vars], StatsPredictTE, on=['Player', 'Team', 'Position', 'Season'])
WR2017 = pd.merge(WR2017[games_vars], StatsPredictWR, on=['Player', 'Team', 'Position', 'Season'])

In [467]:
QB2017['PredSeasonPassYrds'] = QB2017.apply(total_pass, axis=1)
QB2017['PredSeasonRushYrds'] = QB2017.apply(total_rush, axis=1)
RB2017['PredSeasonPassYrds'] = RB2017.apply(total_pass, axis=1)
RB2017['PredSeasonRushYrds'] = RB2017.apply(total_rush, axis=1)
TE2017['PredSeasonPassYrds'] = TE2017.apply(total_pass, axis=1)
TE2017['PredSeasonRushYrds'] = TE2017.apply(total_rush, axis=1)
WR2017['PredSeasonPassYrds'] = WR2017.apply(total_pass, axis=1)
WR2017['PredSeasonRushYrds'] = WR2017.apply(total_rush, axis=1)

In [468]:
QB2017, RB2017, TE2017, WR2017 = add_unpredicted(QB2017, RB2017, TE2017, WR2017)
QB2017, RB2017, TE2017, WR2017 = calc_final(QB2017, RB2017, TE2017, WR2017)

In [469]:
QB2017['PredFantasyPoints'] = QB2017.apply(score_player, args=(ScoreQB,), axis=1)
RB2017['PredFantasyPoints'] = RB2017.apply(score_player, args=(ScoreRB,), axis=1)
TE2017['PredFantasyPoints'] = TE2017.apply(score_player, args=(ScoreTE,), axis=1)
WR2017['PredFantasyPoints'] = WR2017.apply(score_player, args=(ScoreWR,), axis=1)

#### Quarterbacks

In [470]:
QB2017[['Player', 'Team', 'PredFantasyPoints', 'PredSeasonPassYrds', 'PSPassYrds', 'PredGamesPlayedPercent']] \
    .sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,PredFantasyPoints,PredSeasonPassYrds,PSPassYrds,PredGamesPlayedPercent
2,Drew Brees,NO,312.455247,5002.296167,5208.0,0.934669
7,Kirk Cousins,WAS,290.544271,4515.005492,4917.0,0.934669
0,Aaron Rodgers,GB,290.000147,4303.418304,4428.0,0.934669
37,Tom Brady,NE,282.234888,4592.517635,3554.0,1.0
5,Andrew Luck,IND,270.317899,4071.172931,4240.0,0.920572
31,Joe Flacco,BAL,265.932029,4344.493486,4317.0,0.934669
3,Matt Ryan,ATL,261.249318,4164.390822,4944.0,0.934669
16,Russell Wilson,SEA,260.825714,3952.764759,4219.0,0.934669
10,Matthew Stafford,DET,260.317427,4129.19579,4327.0,0.934669
18,Andy Dalton,CIN,256.679792,3964.181054,4206.0,0.934669


In [1232]:
RB2017[['Player', 'Team', 'PredFantasyPoints', 'PredSeasonRushYrds', 'PSRushYrds']] \
    .sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,PredFantasyPoints,PredSeasonRushYrds,PSRushYrds
81,Le'Veon Bell,PIT,176.624349,1175.106612,1268.0
36,Ezekiel Elliott,DAL,169.256531,1183.908293,1631.0
63,DeMarco Murray,TEN,151.50395,1016.222521,1287.0
67,David Johnson,ARI,151.073639,974.787584,1239.0
25,Melvin Gordon,LAC,151.02601,1004.314506,997.0
154,Jordan Howard,CHI,150.324069,1032.856791,1313.0
138,Lamar Miller,HOU,147.549497,1011.645771,1073.0
124,Jay Ajayi,MIA,140.731293,982.818537,1272.0
102,LeGarrette Blount,NE,136.763012,981.536129,1161.0
150,Todd Gurley,LAR,134.730751,887.629871,885.0


In [473]:
TE2017[['Player', 'Team', 'PredFantasyPoints', 'PredSeasonPassYrds', 'PSPassYrds']] \
    .sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,PredFantasyPoints,PredSeasonPassYrds,PSPassYrds
77,Kyle Rudolph,MIN,119.209178,830.546932,840.0
49,Greg Olsen,CAR,118.264892,872.238356,1073.0
14,Jordan Reed,WAS,118.074548,821.230517,686.0
81,Travis Kelce,KC,116.992157,862.234081,1125.0
33,Jimmy Graham,SEA,110.404713,788.000164,923.0
58,Dennis Pitta,BAL,106.392367,773.401977,729.0
40,Delanie Walker,TEN,103.288708,717.748842,800.0
74,Zach Ertz,PHI,101.520197,731.417073,816.0
69,Martellus Bennett,NE,101.40096,696.956654,701.0
3,Coby Fleener,NO,100.754783,727.090528,631.0


In [1234]:
WR2017[['Player', 'Team', 'PredFantasyPoints', 'PredSeasonPassYrds', 'PSPassYrds']] \
    .sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,PredFantasyPoints,PredSeasonPassYrds,PSPassYrds
117,Julian Edelman,NE,145.503579,1076.482035,1106.0
18,T.Y. Hilton,IND,138.147442,1130.808226,1448.0
12,Julio Jones,ATL,135.057889,1129.12017,1409.0
189,Terrelle Pryor Sr.,CLE,130.622312,1013.67786,1007.0
111,Odell Beckham Jr,NYG,127.862909,1159.847754,1367.0
160,DeAndre Hopkins,HOU,127.830272,1013.208969,954.0
73,Larry Fitzgerald,ARI,126.5011,1019.608253,1023.0
39,Golden Tate,DET,126.07797,966.67667,1077.0
146,Demaryius Thomas,DEN,123.223822,996.927987,1083.0
175,Tavon Austin,LAR,121.324331,777.833582,509.0


## Kicker Predictions

In [229]:
kicker_vars = ['Player', 'Position', 'Team', 'Season', 'Games', 'FGoals', 'FGoalsMissed',
                'ExtraPoints', 'ExtraPointsMissed']

In [230]:
kick = select_to_df('fantasy.db', 'KickerSeason', kicker_vars)
kick.head()

Unnamed: 0,Player,Position,Team,Season,Games,FGoals,FGoalsMissed,ExtraPoints,ExtraPointsMissed
0,Robbie Gould,PK,CHI,2006,16.0,32.0,4.0,47.0,0.0
1,Nate Kaeding,PK,LAC,2006,16.0,26.0,3.0,58.0,0.0
2,Jeff Wilkins,PK,LAR,2006,16.0,32.0,5.0,35.0,0.0
3,Matt Stover,PK,BAL,2006,16.0,28.0,2.0,37.0,0.0
4,Josh Scobee,PK,JAC,2006,16.0,26.0,6.0,41.0,0.0


In [231]:
kick['FGoalAtt'] = kick['FGoals'] + kick['FGoalsMissed']
kick['FGoalsPerGame'] = kick['FGoals'] / kick['Games']
kick['FGoalAttPerGame'] = kick['FGoalAtt'] / kick['Games']

kick['ExtraPointsAtt'] = kick['ExtraPoints'] + kick['ExtraPointsMissed']
kick['ExtraPointsPerGame'] = kick['ExtraPoints'] / kick['Games']
kick['ExtraPointAttPerGame'] = kick['ExtraPointsAtt'] / kick['Games']

def agp(row):
    if row['Games'] == 16:
        return 1
    else:
        return 0
    
kick['AllGamesPlayed'] = kick.apply(agp, axis=1)
kick['GamesPlayedPercent'] = kick['Games'] / 16

In [232]:
ModelsK = PositionModels('K')

In [233]:
kick_rank = select_to_df('fantasy.db', 'Roster', ['Player', 'Position', 'Team', 'Season', 'Rank'], ' WHERE Position = \'K\'')

In [234]:
kickers = pd.merge(kick, kick_rank, on=['Player', 'Team', 'Season'], how='left')

In [235]:
kickers = kickers.rename(columns={'Position_x': 'Position'})

In [236]:
def is_starter(row):
    if row['Rank'] == 1:
        return 1
    return 0

kickers['Starter'] = kickers.apply(is_starter, axis=1)

In [237]:
FGoalPerGame = create_model(kickers, 'PK', 'FGoalsPerGame', 
                           ['FGoalsPerGame', 'FGoalAttPerGame', 'Starter'], 
                   game_limit=0, test_cutoff=2014, interacts=[])
ModelsK.add_model('FGoalsPerGame', FGoalPerGame)

|=== Predicting FGoalsPerGame ===|

PSFGoalsPerGame: -0.0132445898665
PSFGoalAttPerGame: 0.220436397198
PSStarter: 0.188892829401
Intercept: 1.00005986582

R-Sqr on Seasons 2014 to 2016: 0.105224264787
Train Data Size: 165
Test Data Size:80


In [241]:
ExtraPointsPerGame = create_model(kickers, 'PK', 'ExtraPointsPerGame', 
                           ['ExtraPointsPerGame', 'ExtraPointAttPerGame'], 
                   game_limit=0, test_cutoff=2014, interacts=[])
ModelsK.add_model('ExtraPointsPerGame', ExtraPointsPerGame)

|=== Predicting ExtraPointsPerGame ===|

PSExtraPointsPerGame: 2.77429163058
PSExtraPointAttPerGame: -2.41087028702
Intercept: 1.49945400562

R-Sqr on Seasons 2014 to 2016: 0.157240100978
Train Data Size: 165
Test Data Size:80


In [242]:
kick_2016 = kickers[kickers.Season == '2016']
kick_2017 = kick_2016.copy()
kick_2017['Season'] = '2017'
kick_2017  = pd.concat([kick_2017, kick_2016])
kick_2017 = build_season_df(kick_2017, interacts=True, season_range=(2017, 2018))

In [243]:
PredictK = ModelsK.predict_df(kick_2017)

Predicting FGoalsPerGame...
Predicting ExtraPointsPerGame...


In [244]:
kick_means = kickers.groupby('Starter')['GamesPlayedPercent'].mean()

In [245]:
def gp_kick(row, means):
    if row['Starter'] == 1:
        return means[1]
    return means[0]

PredictK['PredGamesPlayedPercent'] = PredictK.apply(gp_kick, args=(kick_means,), axis=1)

In [251]:
PredictK['PredSeasonFGoals'] = PredictK['PredFGoalsPerGame'] * PredictK['PredGamesPlayedPercent'] * 16
PredictK['PredSeasonExtraPoints'] = PredictK['PredExtraPointsPerGame'] * PredictK['PredGamesPlayedPercent'] * 16
PredictK['PredFantasyPoints'] = PredictK.apply(score_player, args=(ScoreK,), axis=1)

In [253]:
PredictK[['Player', 'Team', 'Season', 'PredFantasyPoints', 
          'PredSeasonFGoals', 'PredSeasonExtraPoints']].sort('PredFantasyPoints', ascending=False)[:10]

Unnamed: 0,Player,Team,Season,PredFantasyPoints,PredSeasonFGoals,PredSeasonExtraPoints
0,Matt Bryant,ATL,2017,123.062296,25.53807,40.063568
4,Wil Lutz,NO,2017,118.824975,24.982154,37.632973
7,Dan Bailey,DAL,2017,118.758872,24.573581,38.894735
10,Adam Vinatieri,IND,2017,117.379921,24.362966,38.20028
3,Caleb Sturgis,PHI,2017,116.731228,26.367872,31.035643
1,Justin Tucker,BAL,2017,116.500617,25.908681,32.297406
2,Dustin Hopkins,WAS,2017,114.933326,26.591141,28.512119
12,Sebastian Janikowski,OAK,2017,112.998163,25.180114,31.162793
16,Brandon McManus,DEN,2017,112.880974,24.9695,31.730099
21,Chris Boswell,PIT,2017,112.658479,23.508516,36.255803


## Defense Predictions

In [254]:
def_vars = ['City', 'Position',
            'Season', 'Team', 'Sacks',
            'FRecoveries',
            'Interceptions',
            'TDs',
            'Safeties',
            'RushYrdsAllowed',
            'PassYrdsAllowed',
            'TotalYrdsAllowed']

In [255]:
defense = select_to_df('fantasy.db', 'DefenseSeason', def_vars)

Unnamed: 0,City,Position,Season,Team,Sacks,FRecoveries,Interceptions,TDs,Safeties,RushYrdsAllowed,PassYrdsAllowed,TotalYrdsAllowed
0,Chicago,DF,2006,CHI,40.0,20.0,24.0,8.0,1.0,1590.0,3388.0,4978.0
1,Baltimore,DF,2006,BAL,60.0,9.0,28.0,6.0,2.0,1214.0,3429.0,4643.0
2,Green Bay,DF,2006,GB,46.0,10.0,23.0,5.0,0.0,1825.0,3646.0,5471.0
3,Minnesota,DF,2006,MIN,30.0,15.0,21.0,6.0,0.0,985.0,4015.0,5000.0
4,Tennessee,DF,2006,TEN,26.0,10.0,17.0,8.0,2.0,2313.0,3750.0,6063.0


In [256]:
defense['SafetiesPerGame'] = defense['Safeties'] / 16
defense['SacksPerGame'] = defense['Sacks'] / 16
defense['TDsPerGame'] = defense['TDs'] / 16
defense['InterceptionsPerGame'] = defense['Interceptions'] / 16
defense['YrdsAllowedPerGame'] = defense['TotalYrdsAllowed'] / 16

In [262]:
off = select_to_df('fantasy.db', 'OffenseSeason', offense_lbls)
yards_per_point = off.Yards.sum() / off.Points.sum()

In [265]:
defense['EstPointsAllowed'] = defense['TotalYrdsAllowed'] / yards_per_point
defense['EstPointsAllowedPerGame'] = defense['EstPointsAllowed'] / 16

In [266]:
defense.tail()

Unnamed: 0,City,Position,Season,Team,Sacks,FRecoveries,Interceptions,TDs,Safeties,RushYrdsAllowed,PassYrdsAllowed,TotalYrdsAllowed,SafetiesPerGame,SacksPerGame,TDsPerGame,InterceptionsPerGame,YrdsAllowedPerGame,EstPointsAllowed,EstPointsAllowedPerGame
347,San Francisco,DF,2016,SF,31.0,10.0,10.0,0.0,1.0,2654.0,4013.0,6667.0,0.0625,1.9375,0.0,0.625,416.6875,436.499392,27.281212
348,Los Angeles,DF,2016,LAR,31.0,8.0,10.0,1.0,0.0,1660.0,3928.0,5588.0,0.0,1.9375,0.0625,0.625,349.25,365.855498,22.865969
349,Jacksonville,DF,2016,JAC,33.0,6.0,7.0,2.0,0.0,1703.0,3627.0,5330.0,0.0,2.0625,0.125,0.4375,333.125,348.963816,21.810238
350,Cleveland,DF,2016,CLE,26.0,3.0,10.0,1.0,0.0,2283.0,4173.0,6456.0,0.0,1.625,0.0625,0.625,403.5,422.684877,26.417805
351,New York,DF,2016,NYJ,27.0,4.0,8.0,0.0,0.0,1581.0,4077.0,5658.0,0.0,1.6875,0.0,0.5,353.625,370.438512,23.152407


#### Points Allowed Line

In [541]:
yahoo_points = [(0,10), (6,7), (13,4), (20,1), (27,0), (34,-1), (40,-4)]
# ie yahoo awards 10 points if a team gives up 0 yards, 7 points if they give up 6 yards, and so on

In [701]:
def predict_yahoo_points(row, yahoo_points):
    points_allowed = row['PredPointsAllowedPerGame']
    if points_allowed > yahoo_points[len(yahoo_points)-1][0]:
        return -4
    
    for idx in range(len(yahoo_points)):
        left = yahoo_points[idx]
        right = yahoo_points[idx+1]
        
        if points_allowed >= left[0] and points_allowed < right[0]:
            x_prop = (points_allowed - left[0]) / (right[0] - left[0])
            y_extra = x_prop * (right[1] - left[1])
            
            return left[1] + y_extra
    
    return yahoo_points[len(yahoo_points)][0]
            

In [582]:
predict_yahoo_points(10, yahoo_points)

0.5714285714285714


5.285714285714286

In [586]:
defense['GamesPlayedPercent'] = 1.0

In [587]:
defense.head()

Unnamed: 0,City,Position,Season,Team,Sacks,FRecoveries,Interceptions,TDs,Safeties,RushYrdsAllowed,PassYrdsAllowed,TotalYrdsAllowed,SafetiesPerGame,SacksPerGame,TDsPerGame,InterceptionsPerGame,YrdsAllowedPerGame,EstPointsAllowed,EstPointsAllowedPerGame,GamesPlayedPercent
0,Chicago,DF,2006,CHI,40.0,20.0,24.0,8.0,1.0,1590.0,3388.0,4978.0,0.0625,2.5,0.5,1.5,311.125,325.9178,20.369863,1.0
1,Baltimore,DF,2006,BAL,60.0,9.0,28.0,6.0,2.0,1214.0,3429.0,4643.0,0.125,3.75,0.375,1.75,290.1875,303.984802,18.99905,1.0
2,Green Bay,DF,2006,GB,46.0,10.0,23.0,5.0,0.0,1825.0,3646.0,5471.0,0.0,2.875,0.3125,1.4375,341.9375,358.195316,22.387207,1.0
3,Minnesota,DF,2006,MIN,30.0,15.0,21.0,6.0,0.0,985.0,4015.0,5000.0,0.0,1.875,0.375,1.3125,312.5,327.358176,20.459886,1.0
4,Tennessee,DF,2006,TEN,26.0,10.0,17.0,8.0,2.0,2313.0,3750.0,6063.0,0.125,1.625,0.5,1.0625,378.9375,396.954524,24.809658,1.0


### Predicting

In [603]:
defense['FRecPerGame'] = defense['FRecoveries'] / 16

In [635]:
defense.columns

Index(['City', 'Position', 'Season', 'Team', 'Sacks', 'FRecoveries',
       'Interceptions', 'TDs', 'Safeties', 'RushYrdsAllowed',
       'PassYrdsAllowed', 'TotalYrdsAllowed', 'SafetiesPerGame',
       'SacksPerGame', 'TDsPerGame', 'InterceptionsPerGame',
       'YrdsAllowedPerGame', 'EstPointsAllowed', 'EstPointsAllowedPerGame',
       'GamesPlayedPercent', 'FRecPerGame'],
      dtype='object')

In [615]:
ModelsDF = PositionModels('DF')

In [699]:
PointsAllowedPerGame = create_model(defense, 'DF', 'EstPointsAllowedPerGame',
                                   ['EstPointsAllowedPerGame'], test_cutoff=2013, is_defense=True)
ModelsDF.add_model('PointsAllowedPerGame', PointsAllowedPerGame)

|=== Predicting EstPointsAllowedPerGame ===|

PSEstPointsAllowedPerGame: 0.392952217226
Intercept: 14.1996561579

R-Sqr on Seasons 2013 to 2016: 0.148502330319
Train Data Size: 160
Test Data Size:128


In [616]:
TDsPerGame = create_model(defense, 'DF', 'TDsPerGame',
                                   ['TDsPerGame', 'FRecPerGame'], test_cutoff=2013, is_defense=True)
ModelsDF.add_model('TDsPerGame', TDsPerGame)

|=== Predicting TDsPerGame ===|

PSTDsPerGame: 0.113660926302
PSFRecPerGame: -0.0432802527297
Intercept: 0.222608071975

R-Sqr on Seasons 2013 to 2016: 0.014219837389
Train Data Size: 160
Test Data Size:128


In [617]:
SacksPerGame = create_model(defense, 'DF', 'SacksPerGame', 
                           ['SacksPerGame'], 
                   game_limit=0, test_cutoff=2014, interacts=[], is_defense=True)
ModelsDF.add_model('SacksPerGame', SacksPerGame)

|=== Predicting SacksPerGame ===|

PSSacksPerGame: 0.27434980678
Intercept: 1.65428445757

R-Sqr on Seasons 2014 to 2016: 0.071237055612
Train Data Size: 192
Test Data Size:96


In [618]:
InterceptionsPerGame = create_model(defense, 'DF', 'InterceptionsPerGame',
                                   ['InterceptionsPerGame', 'EstPointsAllowedPerGame'], test_cutoff=2014, is_defense=True)
ModelsDF.add_model('InterceptionsPerGame', InterceptionsPerGame)

|=== Predicting InterceptionsPerGame ===|

PSInterceptionsPerGame: 0.110940031178
PSEstPointsAllowedPerGame: -0.0191720465898
Intercept: 1.30461350464

R-Sqr on Seasons 2014 to 2016: 0.033735530925
Train Data Size: 192
Test Data Size:96


In [619]:
FRecPerGame = create_model(defense, 'DF', 'FRecPerGame',
                                   ['FRecPerGame'], test_cutoff=2014, is_defense=True)
ModelsDF.add_model('FRecPerGame', FRecPerGame)

|=== Predicting FRecPerGame ===|

PSFRecPerGame: 0.042929782346
Intercept: 0.611899110438

R-Sqr on Seasons 2014 to 2016: 0.00199004347569
Train Data Size: 192
Test Data Size:96


In [624]:
SafetiesPerGame = create_model(defense, 'DF', 'SafetiesPerGame',
                                   ['SafetiesPerGame'], test_cutoff=2014, is_defense=True)
ModelsDF.add_model('SafetiesPerGame', SafetiesPerGame)

|=== Predicting SafetiesPerGame ===|

PSSafetiesPerGame: -0.0109603592294
Intercept: 0.0338889961856

R-Sqr on Seasons 2014 to 2016: 0.000110054476272
Train Data Size: 192
Test Data Size:96


In [679]:
def_2016 = defense[defense.Season == '2016']
def_2017 = def_2016.copy()
def_2017['Season'] = '2017'
pkey = ['City', 'Position', 'Team', 'Season']

right_cols = dict()
for col in def_2017.columns:
    if str(col) not in pkey:
        right_cols[col] = 'PS'+col
    else:
        right_cols[col] = col

def_2016 = def_2016.rename(columns=right_cols)
def_2016['Season'] = '2017'
DF2017 = pd.merge(def_2017, def_2016, on=pkey)

In [700]:
PredictDF = ModelsDF.predict_df(DF2017)

Predicting InterceptionsPerGame...
Predicting PointsAllowedPerGame...
Predicting SacksPerGame...
Predicting SafetiesPerGame...
Predicting FRecPerGame...
Predicting TDsPerGame...


In [706]:
PredictDF['PredYahooPointsPerGame'] = PredictDF.apply(predict_yahoo_points, 
                                                           args=(yahoo_points,), axis=1)
PredictDF['PredSeasonSacks'] = PredictDF['PredSacksPerGame'] * 16
PredictDF['PredSeasonInterceptions'] = PredictDF['PredInterceptionsPerGame'] * 16
PredictDF['PredSeasonTDs'] = PredictDF['PredTDsPerGame'] * 16
PredictDF['PredSeasonSafeties'] = PredictDF['PredSafetiesPerGame'] * 16
PredictDF['PredSeasonFRecoveries'] = PredictDF['PredFRecPerGame'] * 16
PredictDF['PredSeasonPointsAllowed'] = PredictDF['PredPointsAllowedPerGame'] * 16
PredictDF['PredSeasonYahooPoints'] = PredictDF['PredYahooPointsPerGame'] * 16

In [703]:
ScoreDF = {
    'PredSeasonYahooPoints': 1,
    'PredSeasonFRecoveries': 2,
    'PredSeasonSacks': 1,
    'PredSeasonTDs': 6,
    'PredSeasonSafeties': 2,
    'PredSeasonInterceptions': 2
}

In [704]:
PredictDF['PredFantasyPoints'] = PredictDF.apply(score_player, args=(ScoreDF,), axis=1)

In [710]:
PredictDF[['Team', 'PredFantasyPoints', 'PredSeasonPointsAllowed', 'PredSeasonSacks', 'PredSeasonSafeties', 
           'PredSeasonTDs', 'PredSeasonInterceptions', 
           'PredSeasonFRecoveries']].sort('PredFantasyPoints', ascending=False).to_json('DF2017.json', orient='index')

## Exporting

In [499]:
core_vars = ['Player', 'Team', 'Season', 'Position']
pred_vars = [var for var in QB2017.columns if 'Pred' in str(var)]

In [500]:
QB2017[core_vars + pred_vars].to_json('QB2017.json', orient='index')
RB2017[core_vars + pred_vars].to_json('RB2017.json', orient='index')

pred_vars.remove('PredInterceptionRatio')
pred_vars.remove('PredSeasonInterceptions')
WR2017[core_vars + pred_vars].to_json('WR2017.json', orient='index')

pred_vars.remove('PredRushTDAttRatio')
pred_vars.remove('PredSeasonRushTDs')
TE2017[core_vars + pred_vars].to_json('TE2017.json', orient='index')

In [672]:
PredictK[core_vars+['PredFantasyPoints', 'PredSeasonFGoals', 'PredSeasonExtraPoints', 'PredFGoalsPerGame', 
                    'PredExtraPointsPerGame']].sort('PredFantasyPoints', ascending=False).to_json('K2017.json', orient='index')