In [1]:
# install necessary packages
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# extract pbp, weekly data
weekly_data_test = nfl.import_weekly_data([2023])
weekly_data_train = nfl.import_weekly_data([2022, 2021, 2020, 2019])

Downcasting floats.
Downcasting floats.


In [3]:
# view head of data
weekly_data_test.head()

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0023459,A.Rodgers,Aaron Rodgers,QB,QB,https://static.www.nfl.com/image/private/f_aut...,NYJ,2023,1,REG,...,0.0,,0,,,,,0.0,0.0,0.0
1,00-0024243,M.Lewis,Marcedes Lewis,TE,TE,https://static.www.nfl.com/image/private/f_aut...,CHI,2023,4,REG,...,0.0,0.483465,0,0.0,0.03125,-0.012397,0.038197,0.0,0.8,1.8
2,00-0024243,M.Lewis,Marcedes Lewis,TE,TE,https://static.www.nfl.com/image/private/f_aut...,CHI,2023,7,REG,...,1.0,1.437224,0,3.2,0.034483,0.09434,0.117762,0.0,1.6,2.6
3,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,1,REG,...,0.0,,0,,,,,0.0,14.46,14.46
4,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,2,REG,...,0.0,,0,,,,,0.0,13.98,13.98


In [5]:
# view various positions
weekly_data_test['position'].unique()

array(['QB', 'TE', 'WR', 'P', 'FB', 'RB', 'T', 'OLB', 'ILB', 'CB', 'FS',
       'G', 'SS', 'DT'], dtype=object)

In [6]:
# show cols in weekly data
nfl.see_weekly_cols()

Index(['player_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'sacks', 'sack_yards', 'sack_fumbles',
       'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr',
       'dakota', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost',
       'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions',
       'racr', 'target_share', 'air_yards_share', 'wopr', 'special_teams_tds',
       'fantasy_points', 'fantasy_point

# Filter Variables from Entire Data

In [7]:
# drop cols that are not needed in any dataframe
cols_to_drop = ['player_id',
                'player_display_name',
                'position_group',
                'headshot_url',
                'season_type',
                'receiving_2pt_conversions',
                'sacks',
                'sack_yards',
                'sack_fumbles',
                'sack_fumbles_lost',
                'receiving_fumbles_lost',
                'receiving_fumbles',
                'rushing_fumbles',
                'rushing_fumbles_lost',
                'special_teams_tds'
                ]
weekly_data_test.drop(cols_to_drop, axis=1, inplace=True)
weekly_data_train.drop(cols_to_drop, axis=1, inplace=True)

In [8]:
# check that cols were dropped
weekly_data_test.columns

Index(['player_name', 'position', 'recent_team', 'season', 'week',
       'opponent_team', 'completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'racr', 'target_share',
       'air_yards_share', 'wopr', 'fantasy_points', 'fantasy_points_ppr',
       'date'],
      dtype='object')

## Filter by Position

In [9]:
#QB
qb_train = weekly_data_train[weekly_data_train["position"] == "QB" ]
qb_test = weekly_data_test[weekly_data_test["position"] == "QB"]

#RB
rb_train = weekly_data_train[weekly_data_train["position"] == "RB"]
rb_test = weekly_data_test[weekly_data_test["position"] == "RB"]

#WR
wr_train = weekly_data_train[weekly_data_train["position"] == "WR"]
wr_test = weekly_data_test[weekly_data_test["position"] == "WR"]

#TE
te_train = weekly_data_train[weekly_data_train["position"] == "TE"]
te_test = weekly_data_test[weekly_data_test["position"] == "TE"]

#FLEX
flex_list = ["RB", "WR", "TE"]
flex_train = weekly_data_train[weekly_data_train["position"].isin(flex_list)]
flex_test = weekly_data_test[weekly_data_test["position"].isin(flex_list)]

In [10]:
# check that the flex position is correct
flex_train["position"].unique()

array(['TE', 'WR', 'RB'], dtype=object)

## Drop Cols by Position

In [11]:
# define qb cols to drop (all receiving-related cols)
qb_cols_to_drop = ['position',
                   'receptions',
                   'targets',
                   'receiving_yards',
                   'receiving_tds',
                   'receiving_air_yards',
                   'receiving_yards_after_catch',
                   'receiving_first_downs',
                   'receiving_epa',
                   'racr',
                   'target_share',
                   'air_yards_share',
                   'wopr'
                   ]
qb_train = qb_train.drop(qb_cols_to_drop, axis=1)
qb_test = qb_test.drop(qb_cols_to_drop + ['opponent_team'], axis=1)

In [12]:
# define rb cols to drop (all passing-related cols)
rb_cols_to_drop = ['position',
                   'completions',
                   'attempts',
                   'passing_yards',
                   'passing_tds',
                   'passing_air_yards',
                   'passing_yards_after_catch',
                   'passing_first_downs',
                   'passing_epa',
                   'passing_2pt_conversions',
                   'pacr',
                   'dakota',
                   'interceptions'
                   ]
rb_train = rb_train.drop(rb_cols_to_drop, axis=1)
rb_test = rb_test.drop(rb_cols_to_drop + ['opponent_team'], axis=1)

In [13]:
# define wr cols to drop (all passing, rushing-related cols)
wr_cols_to_drop = ['position',
                   'completions',
                   'attempts',
                   'passing_yards',
                   'passing_tds',
                   'passing_air_yards',
                   'passing_yards_after_catch',
                   'passing_first_downs',
                   'passing_epa',
                   'passing_2pt_conversions',
                   'pacr',
                   'dakota',
                   'carries',
                   'rushing_yards',
                   'rushing_tds',
                   'rushing_first_downs',
                   'rushing_epa',
                   'rushing_2pt_conversions',
                   'interceptions'
                   ]
wr_train = wr_train.drop(wr_cols_to_drop, axis=1)
wr_test = wr_test.drop(wr_cols_to_drop + ['opponent_team'], axis=1)

In [14]:
# define te cols to drop (all passing, rushing-related cols)
te_cols_to_drop = ['position',
                   'completions',
                   'attempts',
                   'passing_yards',
                   'passing_tds',
                   'passing_air_yards',
                   'passing_yards_after_catch',
                   'passing_first_downs',
                   'passing_epa',
                   'passing_2pt_conversions',
                   'pacr',
                   'dakota',
                   'carries',
                   'rushing_yards',
                   'rushing_tds',
                   'rushing_first_downs',
                   'rushing_epa',
                   'rushing_2pt_conversions',
                   'interceptions'
                   ]
te_train = te_train.drop(te_cols_to_drop, axis=1)
te_test = te_test.drop(te_cols_to_drop + ['opponent_team'], axis=1)

In [15]:
# define flex cols to drop (all passing-related cols)
flex_cols_to_drop = ['position',
                    'completions',
                    'attempts',
                    'passing_yards',
                    'passing_tds',
                    'passing_air_yards',
                    'passing_yards_after_catch',
                    'passing_first_downs',
                    'passing_epa',
                    'passing_2pt_conversions',
                    'pacr',
                    'dakota',
                    'interceptions'
                   ]
flex_train = flex_train.drop(flex_cols_to_drop, axis=1)
flex_test = flex_test.drop(flex_cols_to_drop + ['opponent_team'], axis=1)

# Other Preprocessing

In [16]:
# change categorical QB data cols to one-hot encoding
qb_train = pd.get_dummies(qb_train, columns=['recent_team'])
qb_test = pd.get_dummies(qb_test, columns=['recent_team'])

In [17]:
# change categorical RB data cols to one-hot encoding
rb_train = pd.get_dummies(rb_train, columns=['recent_team'])
rb_test = pd.get_dummies(rb_test, columns=['recent_team'])

In [18]:
# change categorical WR data cols to one-hot encoding
wr_train = pd.get_dummies(wr_train, columns=['recent_team'])
wr_test = pd.get_dummies(wr_test, columns=['recent_team'])

In [19]:
# change categorical TE data cols to one-hot encoding
te_train = pd.get_dummies(te_train, columns=['recent_team'])
te_test = pd.get_dummies(te_test, columns=['recent_team'])

In [20]:
# change categorical FLEX data cols to one-hot encoding
flex_train = pd.get_dummies(flex_train, columns=['recent_team'])
flex_test = pd.get_dummies(flex_test, columns=['recent_team'])

In [44]:
# set horizon (number of weeks to predict after training data ends), to 1
h = 1

# Feature Engineering and Training

Code credit for this part goes to [this article](https://forecastegy.com/posts/multivariate-time-series-forecasting-in-python/)

In [45]:
# import necessary packages
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from xgboost import XGBRegressor
from mlforecast import MLForecast

from window_ops.rolling import rolling_mean, rolling_max, rolling_min

In [46]:
# make pipeline for models
models = [
    make_pipeline(SimpleImputer(), #simple imputer fills missing vals in data
                  RandomForestRegressor(),
                  XGBRegressor())
]

# specify forecast settings
model = MLForecast(models=models, #pass model pipeline to model
                   freq='W', #frequency of data is weekly
                   lags=[1,2,3,4,5,6,7,8], #use 1-8 weeks of lagged data to predict next week
                   lag_transforms={
                       1: [(rolling_mean, 4), (rolling_max, 4), (rolling_min, 4)], #use rolling mean, max, min of 4 weeks of data
                   },
                   date_features=['week', 'season'], #date features to use
                   num_threads=1) #number of threads to use - set to num cores in machine


In [47]:
# specify static and dynamic features
qb_dynamic_fts = ['season', 'week', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'fantasy_points', 'fantasy_points_ppr',
       'recent_team_ARI', 'recent_team_ATL', 'recent_team_BAL',
       'recent_team_BUF', 'recent_team_CAR', 'recent_team_CHI',
       'recent_team_CIN', 'recent_team_CLE', 'recent_team_DAL',
       'recent_team_DEN', 'recent_team_DET', 'recent_team_GB',
       'recent_team_HOU', 'recent_team_IND', 'recent_team_JAX',
       'recent_team_KC', 'recent_team_LA', 'recent_team_LAC', 'recent_team_LV',
       'recent_team_MIA', 'recent_team_MIN', 'recent_team_NE',
       'recent_team_NO', 'recent_team_NYG', 'recent_team_NYJ',
       'recent_team_PHI', 'recent_team_PIT', 'recent_team_SEA',
       'recent_team_SF', 'recent_team_TB', 'recent_team_TEN',
       'recent_team_WAS']
qb_static_fts = ['player_name']

In [48]:
rb_dynamic_fts = ['season', 'week', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'racr', 'target_share',
       'air_yards_share', 'wopr', 'fantasy_points', 'fantasy_points_ppr',
       'recent_team_ARI', 'recent_team_ATL', 'recent_team_BAL',
       'recent_team_BUF', 'recent_team_CAR', 'recent_team_CHI',
       'recent_team_CIN', 'recent_team_CLE', 'recent_team_DAL',
       'recent_team_DEN', 'recent_team_DET', 'recent_team_GB',
       'recent_team_HOU', 'recent_team_IND', 'recent_team_JAX',
       'recent_team_KC', 'recent_team_LA', 'recent_team_LAC', 'recent_team_LV',
       'recent_team_MIA', 'recent_team_MIN', 'recent_team_NE',
       'recent_team_NO', 'recent_team_NYG', 'recent_team_NYJ',
       'recent_team_PHI', 'recent_team_PIT', 'recent_team_SEA',
       'recent_team_SF', 'recent_team_TB', 'recent_team_TEN',
       'recent_team_WAS']
rb_static_fts = ['player_name']

In [49]:
wr_dynamic_fts = ['season', 'week', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'racr', 'target_share', 'air_yards_share', 'wopr', 'fantasy_points',
       'fantasy_points_ppr', 'recent_team_ARI', 'recent_team_ATL',
       'recent_team_BAL', 'recent_team_BUF', 'recent_team_CAR',
       'recent_team_CHI', 'recent_team_CIN', 'recent_team_CLE',
       'recent_team_DAL', 'recent_team_DEN', 'recent_team_DET',
       'recent_team_GB', 'recent_team_HOU', 'recent_team_IND',
       'recent_team_JAX', 'recent_team_KC', 'recent_team_LA',
       'recent_team_LAC', 'recent_team_LV', 'recent_team_MIA',
       'recent_team_MIN', 'recent_team_NE', 'recent_team_NO',
       'recent_team_NYG', 'recent_team_NYJ', 'recent_team_PHI',
       'recent_team_PIT', 'recent_team_SEA', 'recent_team_SF',
       'recent_team_TB', 'recent_team_TEN', 'recent_team_WAS']
wr_static_fts = ['player_name']

In [50]:
te_dynamic_fts = ['season', 'week', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'racr', 'target_share', 'air_yards_share', 'wopr', 'fantasy_points',
       'fantasy_points_ppr', 'recent_team_ARI', 'recent_team_ATL',
       'recent_team_BAL', 'recent_team_BUF', 'recent_team_CAR',
       'recent_team_CHI', 'recent_team_CIN', 'recent_team_CLE',
       'recent_team_DAL', 'recent_team_DEN', 'recent_team_DET',
       'recent_team_GB', 'recent_team_HOU', 'recent_team_IND',
       'recent_team_JAX', 'recent_team_KC', 'recent_team_LA',
       'recent_team_LAC', 'recent_team_LV', 'recent_team_MIA',
       'recent_team_MIN', 'recent_team_NE', 'recent_team_NO',
       'recent_team_NYG', 'recent_team_NYJ', 'recent_team_PHI',
       'recent_team_PIT', 'recent_team_SEA', 'recent_team_SF',
       'recent_team_TB', 'recent_team_TEN', 'recent_team_WAS']
te_static_fts = ['player_name']

In [51]:
flex_dynamic_fts = ['week', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'racr', 'target_share',
       'air_yards_share', 'wopr', 'fantasy_points', 'fantasy_points_ppr',
       'recent_team_ARI', 'recent_team_ATL', 'recent_team_BAL',
       'recent_team_BUF', 'recent_team_CAR', 'recent_team_CHI',
       'recent_team_CIN', 'recent_team_CLE', 'recent_team_DAL',
       'recent_team_DEN', 'recent_team_DET', 'recent_team_GB',
       'recent_team_HOU', 'recent_team_IND', 'recent_team_JAX',
       'recent_team_KC', 'recent_team_LA', 'recent_team_LAC', 'recent_team_LV',
       'recent_team_MIA', 'recent_team_MIN', 'recent_team_NE',
       'recent_team_NO', 'recent_team_NYG', 'recent_team_NYJ',
       'recent_team_PHI', 'recent_team_PIT', 'recent_team_SEA',
       'recent_team_SF', 'recent_team_TB', 'recent_team_TEN',
       'recent_team_WAS', ]
flex_static_fts = ['player_name']

# Train Models

In [52]:
model.fit(qb_train,
          id_col= 'player_name',
          time_col='week',
          target_col='fantasy_points_ppr',
          static_features=qb_static_fts,
          max_horizon=h
          )



AttributeError: 'Series' object has no attribute 'isocalendar'

## Notes

* Use RMSE, as its units will be in PPR, not PPR<sup>2</sup>
* Horizon param. is subject to change