# Dependencies

In [1]:
import pandas as pd
import sqlite3 as sql
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error


# Getting Data

In [2]:
stats_cnx = sql.connect('../../Data/Combined Clean Historical Data/Historical_Fantasy_Stats.db')
stats = pd.read_sql_query('SELECT * FROM Historical_Fantasy_Points ', stats_cnx)
stats_cnx.close()

In [3]:
stats['season'] = pd.to_datetime(stats['season'], format='%Y')

In [74]:
stats_prep = stats.copy()

# Manipulating Data

In [75]:
stats_prep['season'] = pd.to_datetime(stats_prep['season'], format='%Y')
stats_prep['nfanpts'] = stats_prep.groupby(['Name','player_id']).shift(-1)['fantast_pts']
stats_prep = stats_prep[stats_prep['nfanpts'].notna()]


In [76]:
stats_prep.rename(columns = {'nfanpts':'target'}, inplace = True)

In [78]:
stats_prep.drop(['Name', 'Position', 'player_id','fantast_pts'], axis=1, inplace=True)
predictors = list(stats_prep)
unwanted = {'season', 'target'}
predictors = [e for e in predictors if e not in unwanted]

In [None]:
stats_prep = stats_prep.set_index(['season'])
stats_prep = stats_prep.sort_index()

In [81]:
stats_prep.apply(pd.isnull).sum()

completions                     0
attempts                        0
passing_yards                   0
passing_tds                     0
interceptions                   0
sacks                           0
sack_yards                      0
sack_fumbles                    0
sack_fumbles_lost               0
passing_air_yards               0
passing_yards_after_catch       0
passing_first_downs             0
passing_epa                     0
passing_2pt_conversions         0
carries                         0
rushing_yards                   0
rushing_tds                     0
rushing_fumbles                 0
rushing_fumbles_lost            0
rushing_first_downs             0
rushing_epa                     0
rushing_2pt_conversions         0
receptions                      0
targets                         0
receiving_yards                 0
receiving_tds                   0
receiving_fumbles               0
receiving_fumbles_lost          0
receiving_air_yards             0
receiving_yard

In [82]:
stats_prep['rtd_sh'] = stats_prep['rtd_sh'].fillna(stats_prep['rtd_sh'].mean())
stats_prep['dom'] = stats_prep['dom'].fillna(stats_prep['dom'].mean())
stats_prep['w8dom'] = stats_prep['w8dom'].fillna(stats_prep['w8dom'].mean())

In [83]:
stats_prep.apply(pd.isnull).sum()

completions                    0
attempts                       0
passing_yards                  0
passing_tds                    0
interceptions                  0
sacks                          0
sack_yards                     0
sack_fumbles                   0
sack_fumbles_lost              0
passing_air_yards              0
passing_yards_after_catch      0
passing_first_downs            0
passing_epa                    0
passing_2pt_conversions        0
carries                        0
rushing_yards                  0
rushing_tds                    0
rushing_fumbles                0
rushing_fumbles_lost           0
rushing_first_downs            0
rushing_epa                    0
rushing_2pt_conversions        0
receptions                     0
targets                        0
receiving_yards                0
receiving_tds                  0
receiving_fumbles              0
receiving_fumbles_lost         0
receiving_air_yards            0
receiving_yards_after_catch    0
receiving_

In [84]:
train = stats_prep.loc[:'2018-01-01']
test = stats_prep.loc['2019-01-01':]

In [85]:
train

Unnamed: 0_level_0,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,...,yac_sh,wopr,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,target
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-01,29,49,352,3,3,4,21,0,0,541,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,6.12
2010-01-01,312,475,3922,28,11,31,193,2,0,4419,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,397.42
2010-01-01,115,189,1197,7,7,15,99,5,2,1572,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,98.70
2010-01-01,0,0,0,0,0,0,0,0,0,0,...,0.108815,0.096468,0.044634,0.038462,0.043689,0.043103,0.041548,0.043399,0.301695,161.90
2010-01-01,0,0,0,0,0,0,0,0,0,0,...,0.214770,0.548342,0.264327,0.350000,0.233918,0.246073,0.307164,0.281462,1.721905,33.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-01,2,3,14,0,0,0,0,0,0,12,...,0.069782,0.058600,0.030415,0.000000,0.025641,0.023256,0.015207,0.024332,0.226545,294.60
2018-01-01,0,0,0,0,0,0,0,0,0,0,...,0.020915,0.157524,0.041797,0.250000,0.040000,0.050633,0.145898,0.083437,0.335000,32.50
2018-01-01,0,0,0,0,0,0,0,0,0,0,...,0.297065,0.244861,0.135135,0.148148,0.122642,0.125523,0.141642,0.137738,1.126214,219.40
2018-01-01,0,0,0,0,0,0,0,0,0,0,...,0.220828,0.712276,0.309998,0.520000,0.315271,0.337719,0.414999,0.351998,2.310000,212.70


In [91]:
test

Unnamed: 0_level_0,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,sack_fumbles,sack_fumbles_lost,passing_air_yards,...,yac_sh,wopr,ry_sh,rtd_sh,rfd_sh,rtdfd_sh,dom,w8dom,yptmpa,target
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.191954,0.320160,0.159401,0.192308,0.161765,0.165217,0.175854,0.165983,1.074336,116.4
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.129032,0.431005,0.139187,0.157895,0.125000,0.129032,0.148541,0.142929,0.918987,185.6
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.4
2019-01-01,1,1,38,0,0,0,0,0,0,37,...,0.206937,0.695226,0.326963,0.375000,0.308642,0.314607,0.350981,0.336570,2.206349,9.6
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.065781,0.094184,0.061009,0.062500,0.073826,0.072727,0.061754,0.061307,0.454955,24.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.006246,0.086417,0.021317,0.000000,0.021277,0.018182,0.010659,0.017054,0.180108,22.2
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.126214,0.194912,0.112557,0.333333,0.139706,0.158940,0.222945,0.156712,0.749415,17.6
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.050279,0.067521,0.019452,0.000000,0.018519,0.017544,0.009726,0.015561,0.117647,206.5
2019-01-01,0,0,0,0,0,0,0,0,0,0,...,0.137056,0.240914,0.112999,0.200000,0.114754,0.126761,0.156499,0.130399,0.884393,54.2


# Modelling 

In [133]:
reg = Ridge(alpha=10000)

In [134]:
reg.fit(train[predictors], train["target"])

In [135]:
predictions = reg.predict(test[predictors])

In [136]:
mean_squared_error(test["target"], predictions)

4267.974937157736

In [137]:
combined = pd.concat([test["target"], pd.Series(predictions, index=test.index)], axis=1)
combined.columns = ["actual", "predictions"]

In [138]:
combined

Unnamed: 0_level_0,actual,predictions
season,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01,116.4,112.371680
2019-01-01,185.6,91.585634
2019-01-01,0.4,29.249628
2019-01-01,9.6,192.285444
2019-01-01,24.2,45.080734
...,...,...
2019-01-01,22.2,30.666277
2019-01-01,17.6,77.864335
2019-01-01,206.5,35.658749
2019-01-01,54.2,54.590262


In [139]:
reg.coef_

array([ 0.21195908, -0.13122676,  0.04524518,  0.84977073,  0.07289661,
       -0.02066974, -0.01350119,  0.11916698,  0.21316686,  0.00822335,
       -0.00945997, -0.24703986,  0.32560928,  0.04857584, -0.09235594,
        0.1122125 ,  0.45144328, -0.30681761, -0.16397466,  0.08666155,
        0.07734413,  0.15825084,  0.71758026,  0.08081005,  0.02146203,
        0.71821854,  0.06767071,  0.13494086,  0.0331231 ,  0.03898627,
       -0.11206933,  0.51403663, -0.08155408,  0.06571333, -1.18108213,
        0.08621393,  0.08865738,  0.07345774,  0.20024679,  0.07800517,
        0.09063423,  0.07718695,  0.07770186,  0.08067055,  0.07469234,
        0.65002827])

In [140]:
stats_prep.corr()["target"]

completions                    0.351609
attempts                       0.346053
passing_yards                  0.356939
passing_tds                    0.369981
interceptions                  0.292147
sacks                          0.313668
sack_yards                     0.310612
sack_fumbles                   0.272743
sack_fumbles_lost              0.252454
passing_air_yards              0.347867
passing_yards_after_catch      0.354208
passing_first_downs            0.356015
passing_epa                    0.298888
passing_2pt_conversions        0.203320
carries                        0.245557
rushing_yards                  0.257700
rushing_tds                    0.264237
rushing_fumbles                0.274330
rushing_fumbles_lost           0.198516
rushing_first_downs            0.280724
rushing_epa                    0.010458
rushing_2pt_conversions        0.108693
receptions                     0.415887
targets                        0.401934
receiving_yards                0.413781


In [141]:
import pickle

In [143]:
filename = "ridge_model.pkl"
pickle.dump(reg, open(filename, 'wb'))


# load the model from disk
#loaded_model = pickle.load(open(filename, 'rb'))
#result = loaded_model.score(X_test, Y_test)