In [7]:
# install necessary packages
import nfl_data_py as nfl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [8]:
# extract pbp, weekly data
weekly_data_test = nfl.import_weekly_data([2023])
weekly_data_train = nfl.import_weekly_data([2022, 2021, 2020, 2019])

Downcasting floats.
Downcasting floats.


In [9]:
# view head of data
weekly_data_test.head()

Unnamed: 0,player_id,player_name,player_display_name,position,position_group,headshot_url,recent_team,season,week,season_type,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,00-0023459,A.Rodgers,Aaron Rodgers,QB,QB,https://static.www.nfl.com/image/private/f_aut...,NYJ,2023,1,REG,...,0.0,,0,,,,,0.0,0.0,0.0
1,00-0024243,M.Lewis,Marcedes Lewis,TE,TE,https://static.www.nfl.com/image/private/f_aut...,CHI,2023,4,REG,...,0.0,0.483465,0,0.0,0.03125,-0.012397,0.038197,0.0,0.8,1.8
2,00-0024243,M.Lewis,Marcedes Lewis,TE,TE,https://static.www.nfl.com/image/private/f_aut...,CHI,2023,7,REG,...,1.0,1.437224,0,3.2,0.035714,0.089286,0.116071,0.0,1.6,2.6
3,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,1,REG,...,0.0,,0,,,,,0.0,14.46,14.46
4,00-0026498,M.Stafford,Matthew Stafford,QB,QB,https://static.www.nfl.com/image/private/f_aut...,LA,2023,2,REG,...,0.0,,0,,,,,0.0,13.98,13.98


In [10]:
# view various positions
weekly_data_test['position'].unique()

array(['QB', 'TE', 'WR', 'P', 'FB', 'RB', 'T', 'OLB', 'ILB', 'CB', 'G',
       'SS', 'DT'], dtype=object)

In [11]:
# show cols in weekly data
nfl.see_weekly_cols()

Index(['player_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'completions', 'attempts', 'passing_yards',
       'passing_tds', 'interceptions', 'sacks', 'sack_yards', 'sack_fumbles',
       'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr',
       'dakota', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost',
       'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions',
       'racr', 'target_share', 'air_yards_share', 'wopr', 'special_teams_tds',
       'fantasy_points', 'fantasy_point

# Filter Variables from Entire Data

In [12]:
# drop cols that are not needed in any dataframe
cols_to_drop = ['player_id',
                'player_display_name',
                'position_group',
                'headshot_url',
                'season_type',
                'receiving_2pt_conversions',
                'interceptions',
                'sacks',
                'sack_yards',
                'sack_fumbles',
                'sack_fumbles_lost',
                'receiving_fumbles_lost',
                'receiving_fumbles',
                'rushing_fumbles',
                'rushing_fumbles_lost',
                'special_teams_tds'
                ]
weekly_data_test.drop(cols_to_drop, axis=1, inplace=True)
weekly_data_train.drop(cols_to_drop, axis=1, inplace=True)

In [13]:
# check that cols were dropped
weekly_data_test.columns

Index(['player_name', 'position', 'recent_team', 'season', 'week',
       'opponent_team', 'completions', 'attempts', 'passing_yards',
       'passing_tds', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr',
       'dakota', 'carries', 'rushing_yards', 'rushing_tds',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'racr', 'target_share',
       'air_yards_share', 'wopr', 'fantasy_points', 'fantasy_points_ppr'],
      dtype='object')

## Filter by Position

In [14]:

#QB
qb_train = weekly_data_train[weekly_data_train["position"] == "QB" ]
qb_test = weekly_data_test[weekly_data_test["position"] == "QB"]

#RB
rb_train = weekly_data_train[weekly_data_train["position"] == "RB"]
rb_test = weekly_data_test[weekly_data_test["position"] == "RB"]

#WR
wr_train = weekly_data_train[weekly_data_train["position"] == "WR"]
wr_test = weekly_data_test[weekly_data_test["position"] == "WR"]

#TE
te_train = weekly_data_train[weekly_data_train["position"] == "TE"]
te_test = weekly_data_test[weekly_data_test["position"] == "TE"]

#FLEX
flex_list = ["RB", "WR", "TE"]
flex_train = weekly_data_train[weekly_data_train["position"].isin(flex_list)]
flex_test = weekly_data_test[weekly_data_test["position"].isin(flex_list)]

In [19]:
# check that the flex position is correct
flex_train["position"].unique()

array(['TE', 'WR', 'RB'], dtype=object)

## Remove NA Values / Drop Cols by Position

In [None]:
# define removeNA function
def removeNA(input_df):
    out_df = input_df.dropna(axis=1)
    
    return

In [15]:
# inspect qb for NAs
qb_train.columns


Index(['player_name', 'position', 'recent_team', 'season', 'week',
       'completions', 'attempts', 'passing_yards', 'passing_tds',
       'passing_air_yards', 'passing_yards_after_catch', 'passing_first_downs',
       'passing_epa', 'passing_2pt_conversions', 'pacr', 'dakota', 'carries',
       'rushing_yards', 'rushing_tds', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'racr', 'target_share',
       'air_yards_share', 'wopr', 'fantasy_points', 'fantasy_points_ppr'],
      dtype='object')

In [None]:
# define qb cols to drop
qb_cols_to_drop = ['receptions',
                   'targets',
                   'receiving_yards',
                   'receiving_tds',
                   'receiving_air_yards',
                   'receiving_yards_after_catch',
                   'receiving_first_downs',
                   'receiving_epa',
                   'racr',
                   'target_share',
                   'air_yards_share',
                   ]

In [None]:
#inspect rb for NAs

In [None]:
# define rb cols to drop
rb_cols_to_drop = []

In [None]:
# inspect wr for NAs

In [None]:
# define wr cols to drop
wr_cols_to_drop = []

In [None]:
# inspect te for NAs

In [None]:
# define te cols to drop
te_cols_to_drop = []

In [None]:
# inspect flex for NAs

In [None]:
# define flex cols to drop
flex_cols_to_drop = []