In [42]:
import numpy as np
import sklearn as sk
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from sqlite_api import *
from sklearn import linear_model

In [3]:
fdb = 'fantasy.db'

player_lbls = ['Player', 'Position', 'Team', 'Season', 'Plays', 'Games', 'RushAttempts', 
              'RushYrds', 'RushTDs', 'PassAttempts', 'Complete', 'PassYrds', 'PassTDs', 'Fumbles', 'Interceptions']

In [4]:
## Scoring metrics

qb = {'PassYrds': 0.04, # QBs get 0.04 points per passing yard 
        'PassTDs': 4, 
        'Interceptions': -2,
        'RushYrds': 0.1,
        'RushTDs': 6,
        'Fumbles': -2}

wr = {'PassYrds': 0.01,
         'PassTDs': 6,
         'Interceptions': -2,
         'RushYrds': 0.1,
         'RushTDs': 6,
         'Fumbles': -2}

rb = wr.copy()

te = wr.copy()

scores = {'QB': qb,
             'RB': rb,
             'WR': wr,
             'TE': te}

# need 2 points for 2point conversions

In [11]:
def score_row(row, qb_scores, wr_scores, rb_scores, te_scores):
    score = 0
    if row['Position'] == 'QB':
        metrics = qb_scores
    elif row['Position'] == 'WR':
        metrics = wr_scores
    elif row['Position'] == 'RB':
        metrics = rb_scores
    elif row['Position'] == 'TE':
        metrics = te_scores
    
    for var in metrics:
        score += row[var] * metrics[var]
        
    return score

In [12]:
select_to_df(fdb, 'OffenseSeason', ['Team', 'Season', 'Yards']).tail()

Unnamed: 0,Team,Season,Yards
91,SEA,2014,6414.0
92,SF,2014,5771.0
93,TB,2014,5215.0
94,TEN,2014,5159.0
95,WAS,2014,6177.0


In [25]:
plyr = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')  

In [5]:
# create 2016 mock season database
rdb = 'rankings.db'
conn = sqlite3.connect(rdb)
c = conn.cursor()

In [95]:
c.execute('DROP TABLE Kickers')

OperationalError: no such table: Kickers

In [96]:
for pos in ['Quarterbacks', 'RunningBacks', 'WideReceivers', 'TightEnds', 'Kickers']:
    c.execute('''CREATE Table ''' + pos +
                '''(Player VARCHAR(30),
                    Team VARCHAR(3),
                    Position CHARACTER(2),
                    PRIMARY KEY (Player))''')

In [97]:
c.execute('''CREATE Table Defenses
                (Team VARCHAR(3),
                Position CHARACTER(2),
                PRIMARY KEY (Team))''')

<sqlite3.Cursor at 0x21886351f10>

In [102]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, where='WHERE Season=2016')[['Player', 'Position', 'Team']]
QBs = df[df.Position == 'QB']
for idx in range(len(QBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO Quarterbacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [103]:
RBs = df[df.Position == 'RB']
for idx in range(len(RBs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO RunningBacks VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [104]:
WRs = df[df.Position == 'WR']
for idx in range(len(WRs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO WideReceivers VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [105]:
TEs = df[df.Position == 'TE']
for idx in range(len(TEs)):
    row = df.iloc[idx]
    c.execute('INSERT INTO TightEnds VALUES (?, ?, ?)', (row[0], row[2], row[1]))

In [107]:
df = select_to_df(fdb, 'KickerSeason', ['Player', 'Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Kickers VALUES (?, ?, ?)', df.iloc[idx])

In [108]:
df = select_to_df(fdb, 'DefenseSeason', ['Team', 'Position'], 'WHERE Season = 2016')
for idx in range(len(df)):
    c.execute('INSERT INTO Defenses VALUES (?, ?)', df.iloc[idx])

In [30]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls, 'WHERE Season != 2016')
df['Score'] = ply.apply(score_row, args=(qb, wr, rb, te), axis=1)

In [64]:
for i in ply.Team.unique:
    print(i)

TypeError: 'method' object is not iterable

## Offensive Features

In [6]:
def add_feature(df, name, func):
    df[name] = df.apply(func, axis=1)
    return df

In [7]:
offense_lbls = ['Team', 
                 'Season', 
                 'Yards',
                 'PassYrds',
                 'RushYrds',
                 'Points']

In [8]:
def pass_rush_rat(row):
    return row['PassYrds'] / row['RushYrds']

def yards_per_game(row):
    return row['Yards'] / 16

def pass_yrds_per_game(row):
    return row['PassYrds'] / 16

def rush_yrds_per_game(row):
    return row['RushYrds'] / 16

def points_per_game(row):
    return row['Points'] / 16

In [57]:
off = select_to_df(fdb, 'OffenseSeason', offense_lbls)

In [58]:
off = add_feature(off, 'OffPassRushRatio', pass_rush_rat)
off = add_feature(off, 'OffYardsPerGame', yards_per_game)
off = add_feature(off, 'OffPassYrdsPerGame', pass_yrds_per_game)
off = add_feature(off, 'OffRushYrdsPerGame', rush_yrds_per_game)
off = add_feature(off, 'OffPointsPerGame', points_per_game)
off = off.drop(['Yards', 'PassYrds', 'RushYrds', 'Points'], axis=1)
off.head()

Unnamed: 0,Team,Season,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
0,NE,2007,2.55868,411.25,295.6875,115.5625,36.8125
1,DAL,2007,2.351088,365.6875,256.5625,109.125,28.4375
2,IND,2007,2.364009,358.6875,252.0625,106.625,28.125
3,JAX,2007,1.391886,357.4375,208.0,149.4375,25.6875
4,SEA,2007,2.448425,348.9375,247.75,101.1875,24.5625


## Additional Player Features

In [59]:
def pass_tdatt_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassTDs'] / row['PassAttempts']

def rush_tdatt_rat(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushTDs'] / row['RushAttempts']

def complete_perc(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Complete'] / row['PassAttempts']

def avg_rush_yrds(row):
    if row['RushAttempts'] == 0:
        return 0
    return row['RushYrds'] / row['RushAttempts']

def avg_pass_yrds(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['PassYrds'] / row['PassAttempts']

def avg_plays(row):
    return row['Plays'] / row['Games']

def score_per_play(row):
    return row['Score'] / row['Plays']

def score_per_game(row):
    return row['Score'] / row['Games']

def games_perc(row):
    return row['Games'] / 16

def intercept_rat(row):
    if row['PassAttempts'] == 0:
        return 0
    return row['Interceptions'] / row['PassAttempts']

def pass_att_per_game(row):
    return row['PassAttempts'] / row['Games']

def rush_att_per_game(row):
    return row['RushAttempts'] / row['Games']

def fumb_per_game(row):
    return row['Fumbles'] / row['Games']

In [62]:
df = select_to_df(fdb, 'PlayerSeason', player_lbls)
df['PassTDAttRatio'] = df.apply(pass_tdatt_rat, axis=1)
df['RushTDAttRatio'] = df.apply(rush_tdatt_rat, axis=1)
df['AvgPassYrds'] = df.apply(avg_pass_yrds, axis=1)
df['AvgRushYrds'] = df.apply(avg_rush_yrds, axis=1)
df['PercentComplete'] = df.apply(complete_perc, axis=1)
df['PlaysPerGame'] = df.apply(avg_plays, axis=1)
df['GamesPlayedPercent'] = df.apply(games_perc, axis=1)
df['InterceptionRatio'] = df.apply(intercept_rat, axis=1)
df['PassAttPerGame'] = df.apply(pass_att_per_game, axis=1)
df['RushAttPerGame'] = df.apply(rush_att_per_game, axis=1)
df['FumblesPerGame'] = df.apply(fumb_per_game, axis=1)
#df['ScorePerPlay'] = df.apply(score_per_play, axis=1)
#df['ScorePerGame'] = df.apply(score_per_game, axis=1)

In [24]:
df[(df.RushAttempts > 0) & (df.Position == 'TE')].sort_values('RushAttempts', ascending=False)

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,Interceptions,Score,PassTDAttRatio,RushTDAttRatio,AvgPassYrds,AvgRushYrds,PercentComplete,PlaysPerGame,ScorePerPlay,ScorePerGame
4163,Charles Clay,TE,MIA,2013,107.0,16.0,7.0,15.0,1.0,100.0,...,0.0,51.09,0.060000,0.142857,7.590000,2.142857,0.690000,6.687500,0.477477,3.193125
4241,MarQueis Gray,TE,CLE,2013,15.0,12.0,6.0,43.0,0.0,9.0,...,0.0,4.38,0.000000,0.000000,0.888889,7.166667,0.222222,1.250000,0.292000,0.365000
4804,Trey Burton,TE,PHI,2014,5.0,15.0,5.0,10.0,0.0,0.0,...,0.0,1.00,0.000000,0.000000,0.000000,2.000000,0.000000,0.333333,0.200000,0.066667
3076,Aaron Hernandez,TE,NE,2011,118.0,14.0,5.0,45.0,0.0,113.0,...,0.0,53.60,0.061947,0.000000,8.053097,9.000000,0.699115,8.428571,0.454237,3.828571
1569,Jeff Dugan,TE,MIN,2008,6.0,9.0,4.0,7.0,0.0,2.0,...,0.0,0.82,0.000000,0.000000,6.000000,1.750000,1.000000,0.666667,0.136667,0.091111
2042,Delanie Walker,TE,SF,2009,36.0,16.0,3.0,34.0,0.0,33.0,...,0.0,3.73,0.000000,0.000000,7.060606,11.333333,0.636364,2.250000,0.103611,0.233125
2547,Aaron Hernandez,TE,NE,2010,67.0,14.0,3.0,47.0,0.0,64.0,...,0.0,46.33,0.093750,0.000000,8.796875,15.666667,0.703125,4.785714,0.691493,3.309286
2573,Delanie Walker,TE,SF,2010,48.0,14.0,3.0,18.0,0.0,45.0,...,0.0,3.11,0.000000,0.000000,7.355556,6.000000,0.644444,3.428571,0.064792,0.222143
988,Kris Wilson,TE,KC,2007,53.0,16.0,3.0,7.0,0.0,50.0,...,0.0,6.50,0.020000,0.000000,3.600000,2.333333,0.480000,3.312500,0.122642,0.406250
3644,Dwayne Allen,TE,IND,2012,69.0,16.0,3.0,5.0,0.0,66.0,...,0.0,23.71,0.045455,0.000000,7.893939,1.666667,0.681818,4.312500,0.343623,1.481875


In [65]:
players = pd.merge(df, off, on=['Team', 'Season'])
players.tail()

Unnamed: 0,Player,Position,Team,Season,Plays,Games,RushAttempts,RushYrds,RushTDs,PassAttempts,...,GamesPlayedPercent,InterceptionRatio,PassAttPerGame,RushAttPerGame,FumblesPerGame,OffPassRushRatio,OffYardsPerGame,OffPassYrdsPerGame,OffRushYrdsPerGame,OffPointsPerGame
5224,Danny Vitale,RB,CLE,2016,5.0,9.0,0.0,0.0,0.0,5.0,...,0.5625,0.0,0.555556,0.0,0.0,1.906542,311.0,204.0,107.0,16.5
5225,Gary Barnidge,TE,CLE,2016,82.0,16.0,0.0,0.0,0.0,82.0,...,1.0,0.0,5.125,0.0,0.0,1.906542,311.0,204.0,107.0,16.5
5226,Seth DeValve,TE,CLE,2016,12.0,12.0,0.0,0.0,0.0,12.0,...,0.75,0.0,1.0,0.0,0.0,1.906542,311.0,204.0,107.0,16.5
5227,Connor Hamlett,TE,CLE,2016,1.0,3.0,0.0,0.0,0.0,1.0,...,0.1875,0.0,0.333333,0.0,0.0,1.906542,311.0,204.0,107.0,16.5
5228,Randall Telfer,TE,CLE,2016,7.0,14.0,0.0,0.0,0.0,7.0,...,0.875,0.0,0.5,0.0,0.0,1.906542,311.0,204.0,107.0,16.5


## Creating the Models

In [121]:
# todo: enable interactions
# get rid of zeros
# maybe get dont train on low games played
# maybe binary predict upward trend/downward trend

def create_model(df, pos, predict, features, reg_type='linear'):
    df = df[df.Position == pos]
    left_vars = ['Player', 'Team', 'Season'] + [predict]
    right_vars = ['Player', 'Team', 'Season'] + features
    model_vars = left_vars.copy()
    for var in features:
        model_vars.append('PS' + var)
        
    # build train dataframe
    model_df = pd.DataFrame(columns=model_vars)
    for season in range(2008, 2016):
        prev_season = str(season - 1)
        season = str(season)
        left_df = df[left_vars][df.Season == season]
        right_df = df[right_vars][df.Season == prev_season]
        right_df['Season'] = season
        left_df = pd.merge(left_df, right_df, on=['Player', 'Team', 'Season'])
        left_df.columns = model_vars
        model_df = model_df.append(left_df)
        
    # build test dataframe
    left_df = df[left_vars][df.Season == '2016']
    right_df = df[right_vars][df.Season == '2015']
    right_df['Season'] = '2016'
    test_df = pd.merge(left_df, right_df, on=['Player', 'Team', 'Season'])
    test_df.columns = model_vars
  
    if reg_type == 'linear':
        model = linear_model.LinearRegression()
    elif reg_type == 'logistic':
        model = linear_model.LogisticRegression()
    print(test_df)   
    model.fit(model_df[['PS'+v for v in features]], model_df[predict])
    print(model.coef_)    
    print(model.score(model_df[['PS'+v for v in features]], model_df[predict]))
    return model

mdl = create_model(players, 'QB', 'GamesPlayedPercent', ['GamesPlayedPercent', 'AvgPassYrds', 'PassTDAttRatio', 'PercentComplete', 'OffPassYrdsPerGame'])

                Player Team Season  GamesPlayedPercent  PSGamesPlayedPercent  \
0        Aaron Rodgers   GB   2016              1.0000                1.0000   
1           Drew Brees   NO   2016              1.0000                0.9375   
2            Matt Ryan  ATL   2016              1.0000                1.0000   
3          Andrew Luck  IND   2016              0.9375                0.4375   
4         Kirk Cousins  WAS   2016              1.0000                1.0000   
5        Philip Rivers  LAC   2016              1.0000                1.0000   
6       Kellen Clemens  LAC   2016              0.7500                0.1250   
7     Matthew Stafford  DET   2016              1.0000                1.0000   
8            Tony Romo  DAL   2016              0.0625                0.2500   
9       Jameis Winston   TB   2016              1.0000                1.0000   
10      Russell Wilson  SEA   2016              1.0000                1.0000   
11         Andy Dalton  CIN   2016      

In [80]:
players[['Player', 'GamesPlayedPercent', 'AvgPassYrds', 'PassTDAttRatio']][players.Player == 'Tom Brady'].tail()

Unnamed: 0,Player,GamesPlayedPercent,AvgPassYrds,PassTDAttRatio
2609,Tom Brady,1.0,7.577708,0.053375
3312,Tom Brady,1.0,6.915605,0.039809
3785,Tom Brady,1.0,7.060137,0.056701
4164,Tom Brady,1.0,7.644231,0.057692
5012,Tom Brady,0.75,8.226852,0.064815


In [123]:
mdl = create_model(players, 'QB', 'AvgPassYrds', ['AvgPassYrds', 'GamesPlayedPercent'])

                Player Team Season  AvgPassYrds  PSAvgPassYrds  \
0        Aaron Rodgers   GB   2016     7.259016       6.680070   
1           Drew Brees   NO   2016     7.738484       7.767145   
2            Matt Ryan  ATL   2016     9.258427       7.477199   
3          Andrew Luck  IND   2016     7.779817       6.419795   
4         Kirk Cousins  WAS   2016     8.113861       7.672192   
5        Philip Rivers  LAC   2016     7.588235       7.249622   
6       Kellen Clemens  LAC   2016     0.000000      10.500000   
7     Matthew Stafford  DET   2016     7.284512       7.199324   
8            Tony Romo  DAL   2016     7.250000       7.305785   
9       Jameis Winston   TB   2016     7.213404       7.555140   
10      Russell Wilson  SEA   2016     7.727106       8.331263   
11         Andy Dalton  CIN   2016     7.470693       8.419689   
12      Marcus Mariota  TEN   2016     7.596452       7.616216   
13       Carson Palmer  ARI   2016     7.090452       8.698324   
14        

In [111]:
players.columns

Index(['Player', 'Position', 'Team', 'Season', 'Plays', 'Games',
       'RushAttempts', 'RushYrds', 'RushTDs', 'PassAttempts', 'Complete',
       'PassYrds', 'PassTDs', 'Fumbles', 'Interceptions', 'PassTDAttRatio',
       'RushTDAttRatio', 'AvgPassYrds', 'AvgRushYrds', 'PercentComplete',
       'PlaysPerGame', 'GamesPlayedPercent', 'InterceptionRatio',
       'PassAttPerGame', 'RushAttPerGame', 'FumblesPerGame',
       'OffPassRushRatio', 'OffYardsPerGame', 'OffPassYrdsPerGame',
       'OffRushYrdsPerGame', 'OffPointsPerGame'],
      dtype='object')