In [1]:
import os
import sys
src_dir = os.path.join(os.getcwd(), '..', 'src')
sys.path.append(src_dir)

from utils import GLOBAL, functions

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [21]:
data_df = pd.DataFrame()

yearStart = 2014
yearEnd = 2021

WEEKLY_BASE_URL = "https://raw.githubusercontent.com/fantasydatapros/data/master/weekly/{year}/week{week}.csv"

for year in range(yearStart, yearEnd):
    for week in range(1, 18):
        weekly_df = pd.read_csv(WEEKLY_BASE_URL.format(year=year, week=week))
        weekly_df['Year'] = year
        weekly_df['Week'] = week
        weekly_df['Usage'] = (weekly_df['PassingAtt'] + weekly_df['RushingAtt'] + weekly_df['Tgt'])
        data_df = pd.concat([data_df, weekly_df])

In [42]:
#Cleaning up TM values
data_df.loc[(data_df['Tm'] == 'OTI'), 'Tm'] = 'TEN'
data_df.loc[(data_df['Tm'] == 'RAM'), 'Tm'] = 'LAR'
data_df.loc[(data_df['Tm'] == 'STL'), 'Tm'] = 'LAR'
data_df.loc[(data_df['Tm'] == 'HTX'), 'Tm'] = 'HOU'
data_df.loc[(data_df['Tm'] == 'SDG'), 'Tm'] = 'LAC'
data_df.loc[(data_df['Tm'] == 'OAK'), 'Tm'] = 'LV'
data_df.loc[(data_df['Tm'] == 'RAI'), 'Tm'] = 'LV'
data_df.loc[(data_df['Tm'] == 'CLT'), 'Tm'] = 'IND'
data_df.loc[(data_df['Tm'] == 'RAV'), 'Tm'] = 'BAL'
data_df.loc[(data_df['Tm'] == 'CRD'), 'Tm'] = 'ARI'
data_df.loc[(data_df['Tm'] == 'NOR'), 'Tm'] = 'NO'

In [44]:
data_df = data_df.groupby(['Player', 'Pos', 'Tm', 'Year'], as_index=False)\
    .agg({
        'Usage': np.sum,
        'PassingYds': np.sum,
        'PassingTD': np.sum,
        'PassingAtt': np.sum,
        'RushingAtt': np.sum,
        'RushingYds': np.sum,
        'RushingTD': np.sum,
        'Rec': np.sum,
        'Tgt': np.sum,
        'ReceivingYds': np.sum,
        'ReceivingTD': np.sum,
        'PPRFantasyPoints': np.sum,
        'StandardFantasyPoints': np.sum,
        'HalfPPRFantasyPoints': np.sum
    })

In [45]:
#Set Scoring format that will be used below ('HalfPPR', 'PPR', or 'Standard')
scoring_format = 'HalfPPR'

In [49]:
pd.set_option('chained_assignment', None)

lag_features = [
    'RushingAtt',
    'Tgt',
    'Usage', 
    f'{scoring_format}FantasyPoints', 
    'PassingAtt', 
    'PassingTD'
]

for lag in range(1, 7):
    shifted = data_df.groupby('Player').shift(lag)

    for column in lag_features:
        data_df[f'lag_{column}_{lag}'] = shifted[column]
        
data_df = data_df.fillna(-1)

In [53]:
#Separate by pos
wr_df = data_df.loc[data_df['Pos'] == 'WR']
rb_df = data_df.loc[data_df['Pos'] == 'RB']
te_df = data_df.loc[data_df['Pos'] == 'TE']
qb_df = data_df.loc[data_df['Pos'] == 'QB']