In [17]:
import os
import sys
src_dir = os.path.join(os.getcwd())
abs_path = os.path.abspath(os.path.join(src_dir, os.pardir, 'src'))
sys.path.append(abs_path)

from utils import GLOBAL, functions

In [18]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [19]:
data_df = pd.DataFrame()

yearStart = 2014
yearEnd = 2021

WEEKLY_BASE_URL = "https://raw.githubusercontent.com/fantasydatapros/data/master/weekly/{year}/week{week}.csv"

for year in range(yearStart, yearEnd):
    for week in range(1, 18):
        weekly_df = pd.read_csv(WEEKLY_BASE_URL.format(year=year, week=week))
        weekly_df['Year'] = year
        weekly_df['Week'] = week
        weekly_df['Usage'] = (weekly_df['PassingAtt'] + weekly_df['RushingAtt'] + weekly_df['Tgt'])
        data_df = pd.concat([data_df, weekly_df])

In [20]:
#Cleaning up TM values
data_df.loc[(data_df['Tm'] == 'OTI'), 'Tm'] = 'TEN'
data_df.loc[(data_df['Tm'] == 'RAM'), 'Tm'] = 'LAR'
data_df.loc[(data_df['Tm'] == 'STL'), 'Tm'] = 'LAR'
data_df.loc[(data_df['Tm'] == 'HTX'), 'Tm'] = 'HOU'
data_df.loc[(data_df['Tm'] == 'SDG'), 'Tm'] = 'LAC'
data_df.loc[(data_df['Tm'] == 'OAK'), 'Tm'] = 'LV'
data_df.loc[(data_df['Tm'] == 'RAI'), 'Tm'] = 'LV'
data_df.loc[(data_df['Tm'] == 'CLT'), 'Tm'] = 'IND'
data_df.loc[(data_df['Tm'] == 'RAV'), 'Tm'] = 'BAL'
data_df.loc[(data_df['Tm'] == 'CRD'), 'Tm'] = 'ARI'
data_df.loc[(data_df['Tm'] == 'NOR'), 'Tm'] = 'NO'

In [21]:
data_df = data_df.groupby(['Player', 'Pos', 'Tm', 'Year'], as_index=False)\
    .agg({
        'Usage': np.sum,
        'PassingYds': np.sum,
        'PassingTD': np.sum,
        'PassingAtt': np.sum,
        'RushingAtt': np.sum,
        'RushingYds': np.sum,
        'RushingTD': np.sum,
        'Rec': np.sum,
        'Tgt': np.sum,
        'ReceivingYds': np.sum,
        'ReceivingTD': np.sum,
        'PPRFantasyPoints': np.sum,
        'StandardFantasyPoints': np.sum,
        'HalfPPRFantasyPoints': np.sum
    })

In [22]:
#Set Scoring format that will be used below ('HalfPPR', 'PPR', or 'Standard')
scoring_format = 'HalfPPR'

In [23]:
pd.set_option('chained_assignment', None)

lag_features = [
    'RushingAtt',
    'Tgt',
    'Usage', 
    f'{scoring_format}FantasyPoints', 
    'PassingAtt', 
    'PassingTD'
]

for lag in range(1, 7):
    shifted = data_df.groupby('Player').shift(lag)

    for column in lag_features:
        data_df[f'lag_{column}_{lag}'] = shifted[column]
        
data_df = data_df.fillna(-1)

In [24]:
#Separate by pos
wr_df = data_df.loc[data_df['Pos'] == 'WR']
rb_df = data_df.loc[data_df['Pos'] == 'RB']
te_df = data_df.loc[data_df['Pos'] == 'TE']
qb_df = data_df.loc[data_df['Pos'] == 'QB']

WRs

In [25]:
X = wr_df[['lag_Tgt_1', 'lag_RushingAtt_1', 'lag_PassingAtt_1', 'lag_Usage_1', f'lag_{scoring_format}FantasyPoints_1']]
y = wr_df[f'{scoring_format}FantasyPoints'].values

WR_X_train, WR_X_test, WR_y_train, WR_y_test = train_test_split(X, y, test_size=0.2, random_state=10)

lr = LinearRegression()

lr.fit(WR_X_train, WR_y_train)

WR_y_predict = lr.predict(WR_X_test)

mean_absolute_error(WR_y_test, WR_y_predict)

40.9348815558926

In [26]:
wr_df_pred = wr_df.loc[
    (wr_df['Usage'] > 50) & (wr_df['Year'] == 2020),
     ['Player', 'Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']
]

wr_df_pred['Predicted Points'] = lr.predict(
    wr_df_pred[['Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']].values
)

wr_df_pred.sort_values(by='Predicted Points', ascending=False).head(100)



Unnamed: 0,Player,Tgt,RushingAtt,PassingAtt,Usage,HalfPPRFantasyPoints,Predicted Points
1631,Davante Adams,149.0,0.0,0.0,149.0,300.9,215.804287
5966,Tyreek Hill,134.0,13.0,0.0,147.0,285.4,200.408831
5413,Stefon Diggs,168.0,1.0,0.0,169.0,265.1,196.994447
891,Calvin Ridley,143.0,5.0,0.0,148.0,234.5,175.141103
1404,D.K. Metcalf,129.0,0.0,0.0,129.0,229.8,172.728209
...,...,...,...,...,...,...,...
2665,Jalen Reagor,54.0,4.0,0.0,58.0,63.7,67.453211
1777,Deebo Samuel,44.0,8.0,0.0,52.0,64.2,65.140812
4554,N'Keal Harry,58.0,2.0,0.0,60.0,57.4,65.022471
392,Antonio Gibson,17.0,55.0,0.0,72.0,57.0,40.577694


RBs

In [27]:
X = rb_df[['lag_Tgt_1', 'lag_RushingAtt_1', 'lag_PassingAtt_1', 'lag_Usage_1', f'lag_{scoring_format}FantasyPoints_1']]
y = rb_df[f'{scoring_format}FantasyPoints'].values

RB_X_train, RB_X_test, RB_y_train, RB_y_test = train_test_split(X, y, test_size=0.2, random_state=10)

lr = LinearRegression()

lr.fit(RB_X_train, RB_y_train)

RB_y_predict = lr.predict(RB_X_test)

mean_absolute_error(RB_y_test, RB_y_predict)

50.84410790183181

In [28]:
rb_df_pred = rb_df.loc[
    (rb_df['Usage'] > 50) & (rb_df['Year'] == 2020),
     ['Player', 'Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']
]

rb_df_pred['Predicted Points'] = lr.predict(
    rb_df_pred[['Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']].values
)

rb_df_pred.sort_values(by='Predicted Points', ascending=False).head(100)



Unnamed: 0,Player,Tgt,RushingAtt,PassingAtt,Usage,HalfPPRFantasyPoints,Predicted Points
225,Alvin Kamara,107.0,187.0,0.0,294.0,336.3,201.821134
1892,Derrick Henry,29.0,359.0,0.0,388.0,310.2,194.214865
1436,Dalvin Cook,54.0,312.0,0.0,366.0,309.8,191.768844
3231,Jonathan Taylor,41.0,232.0,0.0,273.0,234.8,154.138902
41,Aaron Jones,63.0,201.0,0.0,264.0,235.4,152.323022
...,...,...,...,...,...,...,...
4236,Matt Breida,10.0,59.0,0.0,69.0,37.5,54.112460
2986,Jeremy McNichols,16.0,47.0,0.0,63.0,37.9,53.806261
1944,Devonta Freeman,10.0,54.0,0.0,64.0,32.5,51.492671
5095,Rodney Smith,11.0,41.0,0.0,52.0,32.0,51.324594


TEs

In [29]:
X = te_df[['lag_Tgt_1', 'lag_RushingAtt_1', 'lag_PassingAtt_1', 'lag_Usage_1', f'lag_{scoring_format}FantasyPoints_1']]
y = te_df[f'{scoring_format}FantasyPoints'].values

TE_X_train, TE_X_test, TE_y_train, TE_y_test = train_test_split(X, y, test_size=0.2, random_state=10)

lr = LinearRegression()

lr.fit(TE_X_train, TE_y_train)

TE_y_predict = lr.predict(TE_X_test)

mean_absolute_error(TE_y_test, TE_y_predict)

30.509183392063285

In [30]:
te_df_pred = te_df.loc[
    (te_df['Usage'] > 50) & (te_df['Year'] == 2020),
     ['Player', 'Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']
]

te_df_pred['Predicted Points'] = lr.predict(
    te_df_pred[['Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']].values
)

te_df_pred.sort_values(by='Predicted Points', ascending=False).head(100)



Unnamed: 0,Player,Tgt,RushingAtt,PassingAtt,Usage,HalfPPRFantasyPoints,Predicted Points
5772,Travis Kelce,145.0,0.0,2.0,147.0,258.26,186.401845
1598,Darren Waller,146.0,0.0,0.0,146.0,223.0,168.400695
5473,T.J. Hockenson,101.0,1.0,0.0,102.0,139.8,114.378264
4128,Mark Andrews,89.0,0.0,0.0,89.0,141.1,110.539785
4452,Mike Gesicki,85.0,0.0,0.0,85.0,132.8,105.427124
3981,Logan Thomas,89.0,3.0,1.0,93.0,123.02,102.933376
4680,Noah Fant,93.0,0.0,0.0,93.0,116.3,102.007956
2246,Evan Engram,109.0,6.0,0.0,115.0,109.5,101.773783
5061,Robert Tonyan,59.0,0.0,0.0,59.0,150.6,101.771628
2450,Hayden Hurst,87.0,0.0,0.0,87.0,121.1,101.449764


QBs

In [31]:
X = qb_df[['lag_Tgt_1', 'lag_RushingAtt_1', 'lag_PassingAtt_1', 'lag_Usage_1', f'lag_{scoring_format}FantasyPoints_1']]
y = qb_df[f'{scoring_format}FantasyPoints'].values

QB_X_train, QB_X_test, QB_y_train, QB_y_test = train_test_split(X, y, test_size=0.2, random_state=10)

lr = LinearRegression()

lr.fit(QB_X_train, QB_y_train)

QB_y_predict = lr.predict(QB_X_test)

mean_absolute_error(QB_y_test, QB_y_predict)

64.0487593381011

In [32]:
qb_df_pred = qb_df.loc[
    (qb_df['Usage'] > 50) & (qb_df['Year'] == 2020),
     ['Player', 'Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']
]

qb_df_pred['Predicted Points'] = lr.predict(
    qb_df_pred[['Tgt', 'RushingAtt', 'PassingAtt', 'Usage', f'{scoring_format}FantasyPoints']].values
)

qb_df_pred.sort_values(by='Predicted Points', ascending=False).head(100)



Unnamed: 0,Player,Tgt,RushingAtt,PassingAtt,Usage,HalfPPRFantasyPoints,Predicted Points
52,Aaron Rodgers,1.0,38.0,526.0,565.0,382.76,327.760404
3823,Kyler Murray,0.0,133.0,558.0,691.0,378.74,326.568468
1901,Deshaun Watson,0.0,90.0,544.0,634.0,367.32,315.418874
5159,Russell Wilson,0.0,83.0,558.0,641.0,357.78,304.34127
5238,Ryan Tannehill,1.0,43.0,481.0,525.0,337.78,292.859128
4755,Patrick Mahomes,0.0,61.0,543.0,604.0,336.92,285.909844
3844,Lamar Jackson,0.0,137.0,325.0,462.0,298.0,284.280253
3337,Josh Allen,0.0,80.0,458.0,538.0,317.6,280.834664
5699,Tom Brady,0.0,30.0,610.0,640.0,337.92,275.889083
3479,Justin Herbert,0.0,55.0,595.0,650.0,328.84,271.075985
