In [110]:
import os
import sys
src_dir = os.path.join(os.getcwd(), '..', 'src')
sys.path.append(src_dir)

from utils import GLOBAL, functions

In [111]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [142]:
year = 2020

data_df = pd.read_csv(GLOBAL.CLEANED_DATA_2014_2019).iloc[:, 1:]

WEEKLY_BASE_URL = "https://raw.githubusercontent.com/fantasydatapros/data/master/weekly/{year}/week{week}.csv"

In [168]:
#Get 2020 Data and concat to the main data_df
for week in range(1, 18):
    weekly_df = pd.read_csv(WEEKLY_BASE_URL.format(year=year, week=week))
    weekly_df['season'] = int(year)
    weekly_df['week'] = week
    data_df = pd.concat([data_df, weekly_df])

In [114]:
data_df = data_df.groupby(['player_id', 'tm', 'player', 'pos', 'season'], as_index=False)\
    .agg({
        'offensive_snapcount': np.sum,
        'offensive_snapcount_percentage': np.mean,
        'passing_rating': np.mean,
        'passing_yds': np.sum,
        'passing_td': np.sum,
        'passing_att': np.sum,
        'receiving_yds': np.sum,
        'receiving_td': np.sum,
        'receiving_rec': np.sum,
        'receiving_tar': np.sum,
        'rushing_att': np.sum,
        'standard_fantasy_points': np.sum,
        'ppr_fantasy_points': np.sum,
        'half_ppr_fantasy_points': np.sum
    })

In [115]:
#Snap counts started in 2012 so need to filter data to include 2012+

data_df = data_df.loc[data_df['season'] >= 2012]

In [116]:
pd.set_option('chained_assignment', None)

lag_features = [
    'rushing_att', 
    'receiving_tar', 
    'offensive_snapcount', 
    'offensive_snapcount_percentage',
    'ppr_fantasy_points',
    'half_ppr_fantasy_points',
    'standard_fantasy_points',
    'passing_rating',
    'passing_att', 
    'passing_td'
]

for lag in range(1, 6):
    shifted = data_df.groupby('player_id').shift(lag)

    for column in lag_features:
        data_df[f'lag_{column}_{lag}'] = shifted[column]

data_df = data_df.fillna(-1)

In [117]:
#Create position df's
wr_df = data_df.loc[(data_df['pos'] == 'WR') & (data_df['season'] < 2020)]
rb_df = data_df.loc[(data_df['pos'] == 'RB') & (data_df['season'] < 2020)]
qb_df = data_df.loc[(data_df['pos'] == 'QB') & (data_df['season'] < 2020)]
te_df = data_df.loc[(data_df['pos'] == 'TE') & (data_df['season'] < 2020)]

In [118]:
#Minimum threshold for snap counts to 50
wr_df =wr_df.loc[wr_df['lag_offensive_snapcount_1'] > 50]
rb_df =rb_df.loc[rb_df['lag_offensive_snapcount_1'] > 50]
qb_df =qb_df.loc[qb_df['lag_offensive_snapcount_1'] > 50]
te_df =te_df.loc[te_df['lag_offensive_snapcount_1'] > 50]

#Wide Receivers

In [119]:
#WR Machine Learning with Half PPR
scoring_format = 'half_ppr' #Either: 'standard', 'half_ppr', or 'ppr'

X_WR = wr_df[[
    'lag_receiving_tar_1', 'lag_offensive_snapcount_1',f'lag_{scoring_format}_fantasy_points_1'
]].values

y_wr = wr_df[f'{scoring_format}_fantasy_points'].values

X_WR_train, X_WR_test, y_wr_train, y_wr_test = train_test_split(X_WR, y_wr, test_size=0.2, random_state=10)

lr_wr = LinearRegression()

lr_wr.fit(X_WR_train, y_wr_train)

y_wr_predict = lr_wr.predict(X_WR_test)

mean_absolute_error(y_wr_test, y_wr_predict)

43.346808206718926

In [170]:
pd.set_option('display.max_rows', None)

wr_df_predict = wr_df.loc[
    (wr_df['season'] == 2020),
    ['player', 'receiving_tar', 'offensive_snapcount', f'{scoring_format}_fantasy_points']
]

wr_df_predict['predicted_2021'] = lr_wr.predict(
    wr_df[['receiving_tar', 'offensive_snapcount', f'{scoring_format}_fantasy_points']]
)

wr_df_predict.sort_values(by='predicted_2021', ascending=False).head(100)