In [165]:
import numpy as np
import sklearn as sk
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from sqlite_api import *
from sklearn import linear_model

In [6]:
fdb = 'fantasy.db'

player_lbls = ['Player', 'Position', 'Team', 'Season', 'Plays', 'Games', 'RushAttempts', 
              'RushYrds', 'RushTDs', 'PassAttempts', 'Complete', 'PassYrds', 'PassTDs', 'Fumbles', 'Interceptions']

In [7]:
## Scoring metrics

qb = {'PassYrds': 0.04, # QBs get 0.04 points per passing yard 
        'PassTDs': 4, 
        'Interceptions': -2,
        'RushYrds': 0.1,
        'RushTDs': 6,
        'Fumbles': -2}

wr = {'PassYrds': 0.01,
         'PassTDs': 6,
         'Interceptions': -2,
         'RushYrds': 0.1,
         'RushTDs': 6,
         'Fumbles': -2}

rb = wr.copy()

te = wr.copy()

scores = {'QB': qb,
             'RB': rb,
             'WR': wr,
             'TE': te}

# need 2 points for 2point conversions

In [3]:
def score_row(row, qb_scores, wr_scores, rb_scores, te_scores):
    score = 0
    if row['Position'] == 'QB':
        metrics = qb_scores
    elif row['Position'] == 'WR':
        metrics = wr_scores
    elif row['Position'] == 'RB':
        metrics = rb_scores
    elif row['Position'] == 'TE':
        metrics = te_scores
    
    for var in metrics:
        score += row[var] * metrics[var]
        
    return score

In [12]:
select_to_df(fdb, 'OffenseSeason', ['Team', 'Season', 'Yards']).tail()

Unnamed: 0,Team,Season,Yards
91,SEA,2014,6414.0
92,SF,2014,5771.0
93,TB,2014,5215.0
94,TEN,2014,5159.0
95,WAS,2014,6177.0


In [6]:
plyr = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')  

In [8]:
# create 2016 mock season database
rdb = 'rankings.db'
conn = sqlite3.connect(rdb)
c = conn.cursor()

In [95]:
c.execute('DROP TABLE Kickers')

OperationalError: no such table: Kickers

In [96]:
for pos in ['Quarterbacks', 'RunningBacks', 'WideReceivers', 'TightEnds', 'Kickers']:
    c.execute('''CREATE Table ''' + pos +
                '''(Player VARCHAR(30),
                    Team VARCHAR(3),
                    Position CHARACTER(2),
                    PRIMARY KEY (Player))''')

In [97]:
c.execute('''CREATE Table Defenses
                (Team VARCHAR(3),
                Position CHARACTER(2),
                PRIMARY KEY (Team))''')

<sqlite3.Cursor at 0x21886351f10>

In [102]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')[['Player', 'Position', 'Team']]
QBs = df[df.Position == 'QB']
for idx in range(len(QBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO Quarterbacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [103]:
RBs = df[df.Position == 'RB']
for idx in range(len(RBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO RunningBacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [104]:
WRs = df[df.Position == 'WR']
for idx in range(len(WRs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO WideReceivers VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [105]:
TEs = df[df.Position == 'TE']
for idx in range(len(TEs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO TightEnds VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [107]:
df = select_to_df(fdb, 'KickerSeason', ['Player', 'Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Kickers VALUES (?, ?, ?)', df.iloc[idx])

In [108]:
df = select_to_df(fdb, 'DefenseSeason', ['Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Defenses VALUES (?, ?)', df.iloc[idx])

In [30]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, 'WHERE Season != 2016')
df['Score'] = ply.apply(score_row, args=(qb, wr, rb, te), axis=1)

In [64]:
for i in ply.Team.unique:
    print(i)

TypeError: 'method' object is not iterable

## Offensive Features

In [9]:
def add_feature(df, name, func):
    df[name] = df.apply(func, axis=1)
    return df

In [10]:
offense_lbls = ['Team', 
                 'Season', 
                 'Yards',
                 'PassYrds',
                 'RushYrds',
                 'Points']

In [11]:
def pass_rush_rat(row):
    return row['PassYrds'] / row['RushYrds']

def yards_per_game(row):
    return row['Yards'] / 16

def pass_yrds_per_game(row):
    return row['PassYrds'] / 16

def rush_yrds_per_game(row):
    return row['RushYrds'] / 16

def points_per_game(row):
    return row['Points'] / 16

In [12]:
off = select_to_df(fdb, 'OffenseSeason', offense_lbls)

In [13]:
off = add_feature(off, 'OffPassRushRatio', pass_rush_rat)
off = add_feature(off, 'OffYardsPerGame', yards_per_game)
off = add_feature(off, 'OffPassYrdsPerGame', pass_yrds_per_game)
off = add_feature(off, 'OffRushYrdsPerGame', rush_yrds_per_game)
off = add_feature(off, 'OffPointsPerGame', points_per_game)
off = off.drop(['Yards', 'PassYrds', 'RushYrds', 'Points'], axis=1)
off.head()

Unnamed: 0,Team,Season,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
0,NE,2007,2.55868,411.25,295.6875,115.5625,36.8125
1,DAL,2007,2.351088,365.6875,256.5625,109.125,28.4375
2,IND,2007,2.364009,358.6875,252.0625,106.625,28.125
3,JAX,2007,1.391886,357.4375,208.0,149.4375,25.6875
4,SEA,2007,2.448425,348.9375,247.75,101.1875,24.5625


## Additional Player Features

In [14]:
def pass_tdatt_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassTDs'] / row['PassAttempts']

def rush_tdatt_rat(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushTDs'] / row['RushAttempts']

def pass_rush_att_rat(row):
    if row['RushAttempts'] == 0:
        return 1
    return row['PassAttempts'] / row['RushAttempts']

def complete_perc(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Complete'] / row['PassAttempts']

def avg_rush_yrds(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushYrds'] / row['RushAttempts']

def avg_pass_yrds(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassYrds'] / row['PassAttempts']

def avg_plays(row):
    return row['Plays'] / row['Games']

def score_per_play(row):
    return row['Score'] / row['Plays']

def score_per_game(row):
    return row['Score'] / row['Games']

def games_perc(row):
    return row['Games'] / 16

def intercept_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Interceptions'] / row['PassAttempts']

def pass_att_per_game(row):
    return row['PassAttempts'] / row['Games']

def rush_att_per_game(row):
    return row['RushAttempts'] / row['Games']

def fumb_per_game(row):
    return row['Fumbles'] / row['Games']

In [15]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls)
df['PassTDAttRatio'] = df.apply(pass_tdatt_rat, axis=1)
df['RushTDAttRatio'] = df.apply(rush_tdatt_rat, axis=1)
df['AvgPassYrds'] = df.apply(avg_pass_yrds, axis=1)
df['AvgRushYrds'] = df.apply(avg_rush_yrds, axis=1)
df['PercentComplete'] = df.apply(complete_perc, axis=1)
df['PlaysPerGame'] = df.apply(avg_plays, axis=1)
df['GamesPlayedPercent'] = df.apply(games_perc, axis=1)
df['InterceptionRatio'] = df.apply(intercept_rat, axis=1)
df['PassAttPerGame'] = df.apply(pass_att_per_game, axis=1)
df['RushAttPerGame'] = df.apply(rush_att_per_game, axis=1)
df['FumblesPerGame'] = df.apply(fumb_per_game, axis=1)
df['PassRushAttRatio'] = df.apply(pass_rush_att_rat, axis=1)
#df['ScorePerPlay'] = df.apply(score_per_play, axis=1)
#df['ScorePerGame'] = df.apply(score_per_game, axis=1)

In [16]:
players = pd.merge(df, off, on=['Team', 'Season'])
players.tail()

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,InterceptionRatio,PassAttPerGame,RushAttPerGame,FumblesPerGame,PassRushAttRatio,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
5224,Danny Vitale,RB,CLE,2016,5.0,9.0,0.0,0.0,0.0,5.0,...,0.0,0.555556,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5225,Gary Barnidge,TE,CLE,2016,82.0,16.0,0.0,0.0,0.0,82.0,...,0.0,5.125,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5226,Seth DeValve,TE,CLE,2016,12.0,12.0,0.0,0.0,0.0,12.0,...,0.0,1.0,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5227,Connor Hamlett,TE,CLE,2016,1.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.333333,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5
5228,Randall Telfer,TE,CLE,2016,7.0,14.0,0.0,0.0,0.0,7.0,...,0.0,0.5,0.0,0.0,1.0,1.906542,311.0,204.0,107.0,16.5


## Creating the Models

In [560]:
class Model:
    def __init__(self, feature_list):
        self.features = feature_list
    
    def set_model(self, model):
        self.model = model
        
    def predict(self, feature_dict):
        parameters = list()
        for idx in range(len(self.features)):
            if self.features[idx][2:] in feature_dict:
                parameters.append(feature_dict[self.features[idx][2:]])
            else:
                parameters.append(feature_dict[self.features[idx]])
            
        return self.model.predict(parameters)
        
class PositionModels:
    def __init__(self, position):
        self.models = dict()
        self.position = position
    
    def add_model(self, predict, model):
        self.models[predict] = model
        
    def predict_player(self, feature_dict):
        predictions = dict()
        for cur_predict in self.models:
            if cur_predict[2:] in self.models:
                cur_model = self.models[cur_predict[2:]]
            else:
                cur_model = self.models[cur_predict]
            
            #print(str(cur_predict)+': '+str(cur_model.predict(feature_dict)))
            predictions[cur_predict] = cur_model.predict(feature_dict)
        
        return predictions
        
    def predict_df(self, df):
        def predict_var(row, to_predict):
            #print(len(self.predict_player(row.to_dict())[to_predict]))
            return self.predict_player(row.to_dict())[to_predict][0]
        
        for to_predict in self.models:
            print('Predicting '+to_predict+'...')
            new_col = 'Pred' + to_predict
            df[new_col] = df.apply(predict_var, args=(to_predict,), axis=1)
        
        return df
    
    def create_model(self, df, to_predict, features, game_limit=0, test_cutoff=2014, add_limits=dict()):
        new_model = create_model(df, self.position, to_predict, features, game_limit=game_limit, 
                                 test_cutoff=test_cutoff, add_limits=add_limits)
        self.add_model(to_predict, new_model)

In [561]:
# todo: enable interactions
# get rid of zeros
# maybe get dont train on low games played
# maybe binary predict upward trend/downward trend

def create_model(df, pos, predict, features, reg_type='linear', game_limit=0, test_cutoff=2016, add_limits=dict()):
    df = df[df.Position == pos]
    left_vars = ['Player', 'Team', 'Season'] + [predict]
    right_vars = ['Player', 'Team', 'Season'] + features
    model_vars = left_vars.copy()
    for idx in range(len(features)):
        features[idx] = 'PS' + features[idx]
    model_vars += features
        
    # build train dataframe
    model_df = pd.DataFrame(columns=model_vars)
    for season in range(2008, test_cutoff):
        prev_season = str(season - 1)
        season = str(season)
        left_df = df[df.GamesPlayedPercent >= (game_limit/16)]
        for limit in add_limits:
            left_df = left_df[left_df[limit] >= add_limits[limit]]
        left_df = left_df[left_vars][df.Season == season]
        right_df = df[right_vars][df.Season == prev_season]
        right_df['Season'] = str(season)
        left_df = pd.merge(left_df, right_df, on=['Player', 'Team', 'Season'])
        left_df.columns = model_vars
        model_df = model_df.append(left_df)
        
    # build test dataframe
    test_df = pd.DataFrame(columns=model_vars)
    for season in range(test_cutoff, 2017):
        left_df = df[df.GamesPlayedPercent >= (game_limit/16)]
        for limit in add_limits:
            left_df = left_df[left_df[limit] >= add_limits[limit]]
        left_df = left_df[left_vars][df.Season == str(season)]
        right_df = df[right_vars][df.Season == str(season-1)]
        right_df['Season'] = str(season)
        left_df = pd.merge(left_df, right_df, on=['Player', 'Team', 'Season'])
        left_df.columns = model_vars
        test_df = test_df.append(left_df)
  
    if reg_type == 'linear':
        model = linear_model.LinearRegression()
    elif reg_type == 'logistic':
        model = linear_model.LogisticRegression()
    model.fit(model_df[features], model_df[predict])
    print('|=== Predicting '+str(predict)+' ===|\n')
    print('|====== Model Coefficients ======|')
    for idx in range(len(model.coef_)):
        print(features[idx]+': '+str(model.coef_[idx]))
    print('Intercept: '+str(model.intercept_))
    print('\n|========== Performance ==========|')
    print('R-Sqr on Seasons '+str(test_cutoff)+' to 2016: '  + str(model.score(model_df[features], model_df[predict])))
    print('Train Data Size: '+str(len(model_df)))
    print('Test Data Size:'+str(len(test_df)))
    #print(model_df[['Player', 'AvgRushYrds', 'PSAvgRushYrds']])
    model_obj = Model(features)
    model_obj.set_model(model)
    return model_obj

## Quarterbacks

In [562]:
ModelsQB = PositionModels('QB')

In [563]:
AvgPassYrds = create_model(players, 'QB', 'AvgPassYrds', 
                           ['AvgPassYrds', 'PercentComplete', 'OffPassYrdsPerGame'], 
                   game_limit=10, test_cutoff=2014)
ModelsQB.add_model('AvgPassYrds', AvgPassYrds)

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.281325343102
PSPercentComplete: 6.41163920165
PSOffPassYrdsPerGame: 0.000708867372633
Intercept: 1.13018502735

R-Sqr on Seasons 2014 to 2016: 0.52100841563
Train Data Size: 143
Test Data Size:70


In [564]:
PassAttPerGame = create_model(players, 'QB', 'PassAttPerGame', 
                           ['PassAttPerGame', 'AvgPassYrds', 'PassTDAttRatio', 'OffPointsPerGame', 'GamesPlayedPercent'], 
                   game_limit=6, test_cutoff=2014)
ModelsQB.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.704405713448
PSAvgPassYrds: 0.731371374953
PSPassTDAttRatio: -31.0583377968
PSOffPointsPerGame: 0.309706790621
PSGamesPlayedPercent: -1.81155497123
Intercept: 0.26164063658

R-Sqr on Seasons 2014 to 2016: 0.5826327795
Train Data Size: 184
Test Data Size:91


In [565]:
PassTDAttRatio = create_model(players, 'QB', 'PassTDAttRatio', 
                           ['PassTDAttRatio', 'AvgPassYrds', 'PercentComplete', 'GamesPlayedPercent', 'RushTDAttRatio'], 
                   game_limit=8, test_cutoff=2014)
ModelsQB.add_model('PassTDAttRatio', PassTDAttRatio)

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: 0.118074081227
PSAvgPassYrds: 0.00221885064841
PSPercentComplete: 0.0325126213073
PSGamesPlayedPercent: 0.0075644469316
PSRushTDAttRatio: 0.0508856621922
Intercept: -0.00569435143606

R-Sqr on Seasons 2014 to 2016: 0.236048860829
Train Data Size: 161
Test Data Size:78


In [566]:
AvgRushYrds = create_model(players, 'QB', 'AvgRushYrds', 
                           ['AvgRushYrds', 'RushAttPerGame', 'PassAttPerGame', 'OffPointsPerGame'], 
                   game_limit=8, test_cutoff=2014)
ModelsQB.add_model('AvgRushYrds', AvgRushYrds)

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.186097495049
PSRushAttPerGame: 0.606931980498
PSPassAttPerGame: -0.0429372088817
PSOffPointsPerGame: -0.0951528762998
Intercept: 4.5258439588

R-Sqr on Seasons 2014 to 2016: 0.36818219927
Train Data Size: 161
Test Data Size:78


In [567]:
RushAttPerGame = create_model(players, 'QB', 'RushAttPerGame', 
                           ['RushAttPerGame', 'RushTDAttRatio', 'PassRushAttRatio', 'GamesPlayedPercent'], 
                   game_limit=6, test_cutoff=2014)
ModelsQB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.637812988382
PSRushTDAttRatio: 4.63612815001
PSPassRushAttRatio: -0.0147349685176
PSGamesPlayedPercent: -0.330490929254
Intercept: 1.20301224329

R-Sqr on Seasons 2014 to 2016: 0.495878521917
Train Data Size: 184
Test Data Size:91


In [568]:
FumblesPerGame = create_model(players, 'QB', 'FumblesPerGame', 
                           ['FumblesPerGame', 'OffPassYrdsPerGame', 'PassAttPerGame'], 
                   game_limit=8, test_cutoff=2014)
ModelsQB.add_model('FumblesPerGame', FumblesPerGame)

|=== Predicting FumblesPerGame ===|

PSFumblesPerGame: 0.0882807434402
PSOffPassYrdsPerGame: -0.000586994925773
PSPassAttPerGame: 0.00247949140662
Intercept: 0.260515590688

R-Sqr on Seasons 2014 to 2016: 0.0696590095292
Train Data Size: 161
Test Data Size:78


In [569]:
playersQB = players[players.Position == 'QB']

In [570]:
PredictQB = ModelsQB.predict_df(playersQB)

Predicting FumblesPerGame...
Predicting PassAttPerGame...
Predicting RushAttPerGame...
Predicting AvgRushYrds...
Predicting AvgPassYrds...
Predicting PassTDAttRatio...


In [571]:
QB2015 = PredictQB[PredictQB.Season == '2015'][['Player', 'Season', 'Team', 'AvgPassYrds', 
            'PredAvgPassYrds', 'PassAttPerGame', 'PredPassAttPerGame']]
QB2016 = PredictQB[PredictQB.Season == '2016'][['Player', 'Season', 'Team', 'AvgPassYrds', 'PassAttPerGame']]
QB2016.columns = ['Player', 'Season', 'Team', '2016AvgPassYrds', '2016PassAttPerGame']
PredictQB2016 = pd.merge(QB2015, QB2016, on=('Player', 'Team'))
PredictQB2016 = Predict2016[['Player', 'Team', 'PredAvgPassYrds', '2016AvgPassYrds', 'PredPassAttPerGame', '2016PassAttPerGame']].sort(columns=('2016AvgPassYrds'), ascending=False)
PredictQB2016.head()

Unnamed: 0,Player,Team,PredAvgPassYrds,2016AvgPassYrds,PredPassAttPerGame,2016PassAttPerGame
22,Matt Ryan,ATL,7.677779,9.258427,36.449921,33.375
15,Geno Smith,NYJ,7.206724,9.0,40.360128,4.666667
1,Derek Anderson,CAR,7.251571,8.54717,15.397351,10.6
21,Matt Moore,MIA,11.649222,8.287356,17.092592,17.4
3,Tom Brady,NE,7.6145,8.226852,38.721706,36.0


In [572]:
def predict_total_pass(row):
    return row.PredAvgPassYrds * row.PredPassAttPerGame * 16

def actual_total_pass(row):
    return row['2016AvgPassYrds'] * row['2016PassAttPerGame'] * 16

PredictQB2016['PredPassYrds'] = PredictQB2016.apply(predict_total_pass, axis=1)
PredictQB2016['2016PassYrds'] = PredictQB2016.apply(actual_total_pass, axis=1)
PredictQB2016 = PredictQB2016.sort(columns=('2016PassYrds'), ascending=False)
print('Total Correlation:')
print(PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']].corr())
print('Top 15 Correlation:')
print(PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']][:15].corr())
PredictQB2016[['Player', 'PredPassYrds', '2016PassYrds']]

Total Correlation:
              PredPassYrds  2016PassYrds
PredPassYrds      1.000000      0.694407
2016PassYrds      0.694407      1.000000
Top 15 Correlation:
              PredPassYrds  2016PassYrds
PredPassYrds      1.000000      0.571532
2016PassYrds      0.571532      1.000000


Unnamed: 0,Player,PredPassYrds,2016PassYrds
8,Drew Brees,5063.84912,5208.0
22,Matt Ryan,4477.67108,4944.0
11,Kirk Cousins,4299.119325,4917.0
3,Tom Brady,4717.54266,4738.666667
33,Andrew Luck,4094.008997,4522.666667
6,Carson Palmer,4517.050167,4515.2
16,Aaron Rodgers,3832.788392,4428.0
12,Philip Rivers,4589.642609,4386.0
28,Ben Roethlisberger,5075.675846,4364.571429
10,Matthew Stafford,4289.089819,4327.0


## Running Backs

In [573]:
ModelsRB = PositionModels('RB')

#### Primary Stats

In [574]:
AvgRushYrds = create_model(players, 'RB', 'AvgRushYrds', 
                           ['AvgRushYrds', 'RushTDAttRatio', 'RushAttPerGame'], 
                   game_limit=14, test_cutoff=2014, add_limits={'RushAttPerGame': 8})
ModelsRB.add_model('AvgRushYrds', AvgRushYrds)

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.233007230324
PSRushTDAttRatio: -1.91252474272
PSRushAttPerGame: -0.0100144505186
Intercept: 3.50947407628

R-Sqr on Seasons 2014 to 2016: 0.112014723196
Train Data Size: 133
Test Data Size:51


In [575]:
RushAttPerGame = create_model(players, 'RB', 'RushAttPerGame', 
                           ['RushAttPerGame', 'AvgRushYrds'], 
                   game_limit=14, test_cutoff=2014)
ModelsRB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.829612058324
PSAvgRushYrds: 0.350216329528
Intercept: 0.0965459334221

R-Sqr on Seasons 2014 to 2016: 0.742479651202
Train Data Size: 329
Test Data Size:149


In [576]:
RushTDAttRatio = create_model(players, 'RB', 'RushTDAttRatio', 
                           ['RushTDAttRatio', 'AvgRushYrds', 'OffRushYrdsPerGame'], 
                   game_limit=14, test_cutoff=2014)
ModelsRB.add_model('RushAttPerGame', RushAttPerGame)

|=== Predicting RushTDAttRatio ===|

PSRushTDAttRatio: 0.243710221671
PSAvgRushYrds: -0.00328397686789
PSOffRushYrdsPerGame: -0.00024019440338
Intercept: 0.0645768996243

R-Sqr on Seasons 2014 to 2016: 0.0904204171458
Train Data Size: 329
Test Data Size:149


#### Secondary Stats

In [577]:
# create boolean ispasser as addition feature for AvgPassYrds?
# maybe limit train set to only passers and only predict if predicted to be a passer/pass att > certain margin

In [578]:
PassAttPerGame = create_model(players, 'RB', 'PassAttPerGame', 
                           ['PassAttPerGame'], 
                   game_limit=14, test_cutoff=2014)
ModelsRB.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.694162899449
Intercept: 0.66801412628

R-Sqr on Seasons 2014 to 2016: 0.52915422721
Train Data Size: 329
Test Data Size:149


In [579]:
FumblesPerGame = create_model(players, 'RB', 'FumblesPerGame', 
                           ['FumblesPerGame', 'RushAttPerGame'], 
                   game_limit=14, test_cutoff=2014)
ModelsRB.add_model('FumblesPerGame', FumblesPerGame)

|=== Predicting FumblesPerGame ===|

PSFumblesPerGame: 0.120723512678
PSRushAttPerGame: 0.00465479290049
Intercept: 0.0159142261803

R-Sqr on Seasons 2014 to 2016: 0.274732796335
Train Data Size: 329
Test Data Size:149


In [580]:
def predict_total_rush(row):
    return row.PredAvgRushYrds * row.PredRushAttPerGame * 16

def actual_total_rush(row):
    return row['2016AvgRushYrds'] * row['2016RushAttPerGame'] * 16

PredictRB = ModelsRB.predict_df(players[players.Position == 'RB'])
RB2015 = PredictRB[PredictRB.Season == '2015'][['Player', 'Season', 'Team', 'AvgRushYrds', 
            'PredAvgRushYrds', 'RushAttPerGame', 'PredRushAttPerGame']]
RB2016 = PredictRB[PredictRB.Season == '2016'][['Player', 'Season', 'Team', 'AvgRushYrds', 'RushAttPerGame']]
RB2016.columns = ['Player', 'Season', 'Team', '2016AvgRushYrds', '2016RushAttPerGame']
PredictRB2016 = pd.merge(RB2015, RB2016, on=('Player', 'Team'))
PredictRB2016 = PredictRB2016[['Player', 'Team', 'PredAvgRushYrds', '2016AvgRushYrds', 'PredRushAttPerGame', '2016RushAttPerGame']].sort(columns=('2016AvgRushYrds'), ascending=False)
PredictRB2016['PredRushYrds'] = PredictRB2016.apply(predict_total_rush, axis=1)
PredictRB2016['2016RushYrds'] = PredictRB2016.apply(actual_total_rush, axis=1)
PredictRB2016 = PredictRB2016.sort(columns=('PredRushYrds'), ascending=False)
print('Total Correlation:')
print(PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']].corr())
print('Top 15 Correlation:')
print(PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']][:15].corr())
PredictRB2016[['Player', 'PredRushYrds', '2016RushYrds']]

Predicting FumblesPerGame...
Predicting PassAttPerGame...
Predicting AvgRushYrds...
Predicting RushAttPerGame...
Total Correlation:
              PredRushYrds  2016RushYrds
PredRushYrds      1.000000      0.699992
2016RushYrds      0.699992      1.000000
Top 15 Correlation:
              PredRushYrds  2016RushYrds
PredRushYrds      1.000000      0.188889
2016RushYrds      0.188889      1.000000


Unnamed: 0,Player,PredRushYrds,2016RushYrds
66,Adrian Peterson,1282.170020,384.000000
60,Le'Veon Bell,1232.689505,1690.666667
34,Doug Martin,1184.439743,842.000000
88,Todd Gurley,1148.108894,885.000000
0,Jonathan Stewart,1148.045757,1014.153846
11,Chris Johnson,1115.741338,380.000000
50,LeSean McCoy,1088.236749,1351.466667
41,Devonta Freeman,1080.451754,1079.000000
36,Latavius Murray,1035.872869,900.571429
84,Carlos Hyde,1030.047191,1216.000000


In [581]:
# there is a bigger issue with unpredictable number of games played. most overpredictions
# are related to not controlling for games played, but performance is still poor
# especially for the top running backs. hopefully will improve with interactions

## Wide Receivers

In [582]:
ModelsWR = PositionModels('WR')

#### Primary Stats

In [583]:
AvgPassYrds = create_model(players, 'WR', 'AvgPassYrds', 
                           ['AvgPassYrds', 'OffPassYrdsPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4})
ModelsWR.add_model('AvgPassYrds', AvgPassYrds)

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.149691847418
PSOffPassYrdsPerGame: 0.00454528980174
Intercept: 5.67258462413

R-Sqr on Seasons 2014 to 2016: 0.0695903038881
Train Data Size: 322
Test Data Size:145


In [584]:
PassAttPerGame = create_model(players, 'WR', 'PassAttPerGame', 
                           ['PassAttPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 0})
ModelsWR.add_model('PassAttPerGame', PassAttPerGame)

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.764821757917
Intercept: 1.47599181756

R-Sqr on Seasons 2014 to 2016: 0.613892036713
Train Data Size: 486
Test Data Size:228


In [585]:
PassTDAttRatio = create_model(players, 'WR', 'PassTDAttRatio', 
                           ['PassTDAttRatio', 'OffPassYrdsPerGame', 'OffPointsPerGame'], 
                   game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4})
ModelsWR.add_model('PassTDAttRatio', PassTDAttRatio)

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: 0.142111115
PSOffPassYrdsPerGame: 6.44317035492e-05
PSOffPointsPerGame: 0.000371665416427
Intercept: 0.0175440603805

R-Sqr on Seasons 2014 to 2016: 0.0776709687707
Train Data Size: 322
Test Data Size:145


#### Secondary Stats

In [592]:
ModelsWR.create_model(players, 'AvgRushYrds', ['AvgRushYrds', 'RushAttPerGame', 'OffRushYrdsPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={})

|=== Predicting AvgRushYrds ===|

PSAvgRushYrds: 0.147727772425
PSRushAttPerGame: 3.52344002527
PSOffRushYrdsPerGame: -0.0181129923799
Intercept: 4.03411535313

R-Sqr on Seasons 2014 to 2016: 0.0773486183827
Train Data Size: 486
Test Data Size:228


In [596]:
ModelsWR.create_model(players, 'RushAttPerGame', ['RushAttPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={})

|=== Predicting RushAttPerGame ===|

PSRushAttPerGame: 0.790620376438
Intercept: 0.0252573150424

R-Sqr on Seasons 2014 to 2016: 0.467599039223
Train Data Size: 486
Test Data Size:228


In [598]:
# static RushTDAttRatio for WRs

## Tight Ends

In [599]:
ModelsTE = PositionModels('TE')

#### Primary Stats

In [618]:
ModelsTE.create_model(players, 'AvgPassYrds', ['AvgPassYrds'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4})

|=== Predicting AvgPassYrds ===|

PSAvgPassYrds: 0.276619249957
Intercept: 5.42182785496

R-Sqr on Seasons 2014 to 2016: 0.101981319253
Train Data Size: 108
Test Data Size:53


In [628]:
ModelsTE.create_model(players, 'PassAttPerGame', ['PassAttPerGame', 'OffPassYrdsPerGame'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 2})

|=== Predicting PassAttPerGame ===|

PSPassAttPerGame: 0.537619693874
PSOffPassYrdsPerGame: 0.00685887073055
Intercept: 1.02018376937

R-Sqr on Seasons 2014 to 2016: 0.440298185625
Train Data Size: 178
Test Data Size:80


In [624]:
ModelsTE.create_model(players, 'PassTDAttRatio', ['PassTDAttRatio'], 
                      game_limit=12, test_cutoff=2014, add_limits={'PassAttPerGame': 4})

|=== Predicting PassTDAttRatio ===|

PSPassTDAttRatio: 0.21132866975
Intercept: 0.041303701398

R-Sqr on Seasons 2014 to 2016: 0.0761600670352
Train Data Size: 108
Test Data Size:53


In [527]:
players.columns

Index(['Player', 'Position', 'Team', 'Season', 'Plays', 'Games',
       'RushAttempts', 'RushYrds', 'RushTDs', 'PassAttempts', 'Complete',
       'PassYrds', 'PassTDs', 'Fumbles', 'Interceptions', 'PassTDAttRatio',
       'RushTDAttRatio', 'AvgPassYrds', 'AvgRushYrds', 'PercentComplete',
       'PlaysPerGame', 'GamesPlayedPercent', 'InterceptionRatio',
       'PassAttPerGame', 'RushAttPerGame', 'FumblesPerGame',
       'PassRushAttRatio', 'OffPassRushRatio', 'OffYardsPerGame',
       'OffPassYrdsPerGame', 'OffRushYrdsPerGame', 'OffPointsPerGame'],
      dtype='object')