In [1]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

import nfl_data_py as nfl


In [2]:
# Turn off max columns for pandas DataFrame
pd.set_option('display.max_columns', None)

In [3]:
years = range(1999, 2023)

In [4]:
data = nfl.import_pbp_data(years, cache=True, alt_path="../cache")

2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


In [82]:
picks = nfl.import_draft_picks(years)

In [83]:
picks = picks[['season', 'team', 'position', 'round', 'pfr_player_name']]

In [84]:
# Keep only the rows where the position is RB and the round is less than or equal to 5
picks = picks[(picks['position'] == 'RB') & (picks['round'] <= 5)]

In [85]:
picks = picks.rename(columns={'season': 'draft year', 'pfr_player_name': 'rusher_player_name'})

In [86]:
# Split the name into a list
name_split = picks['rusher_player_name'].str.split()

# Take the first character of the first name and combine it with the last name
# If there's a third part (like "Jr."), ignore it
picks['rusher_player_name'] = name_split.apply(
    lambda x: x[0][0] + '.' + x[1] if len(x) > 2 else x[0][0] + '.' + x[-1])

In [87]:
df = data[['season', 'posteam', 'play_type', 'epa', 'season_type']]

In [88]:
df = df[(df['season_type'] == 'REG') & (df['play_type'] == 'run')]

In [89]:
# group by season and team and add a column for epa/rush and epa/rush for the next season
df = df.groupby(['season', 'posteam'])\
    .agg({'epa': 'mean'})\
        .reset_index()\
            .rename(columns={'epa': 'team epa/rush', 'posteam': 'team'})

In [90]:
# add a column for epa/rush rank within the season and the epa/rush rank for the next season
df['team epa/rush rank'] = df.groupby('season')['team epa/rush'].rank(ascending=False)

df['team epa/rush rank next season'] = df.groupby('team')['team epa/rush rank'].shift(-1)

In [91]:
# Keep only the bottom 5 teams in epa/rush for each year
df = df.groupby('season')\
    .apply(lambda x: x.nsmallest(5, 'team epa/rush'))\
        .reset_index(drop=True)

In [92]:
# Create a column for the draft year, which is the season + 1
df['draft year'] = df['season'] + 1

In [93]:
# Merge the draft picks and the bottom 5 teams in epa/rush
df = df.merge(picks, on=['draft year', 'team'], how='left')

In [94]:
df

Unnamed: 0,season,team,team epa/rush,team epa/rush rank,team epa/rush rank next season,draft year,position,round,rusher_player_name
0,2018,MIN,-0.172928,32.0,10.0,2019,RB,3.0,A.Mattison
1,2018,ARI,-0.155427,31.0,3.0,2019,,,
2,2018,NYJ,-0.144788,30.0,31.0,2019,,,
3,2018,LV,-0.121424,29.0,20.0,2019,,,
4,2018,SF,-0.113908,28.0,11.0,2019,,,
5,2019,PIT,-0.230284,32.0,32.0,2020,RB,4.0,A.McFarland
6,2019,NYJ,-0.171019,31.0,29.0,2020,RB,4.0,L.Perine
7,2019,MIA,-0.1647,30.0,22.0,2020,,,
8,2019,CHI,-0.153228,29.0,18.0,2020,,,
9,2019,LA,-0.141041,28.0,23.0,2020,,,


In [95]:
# Create a dataframe of season, team, rusher_id, rusher_player_name, play_type, and epa
rushers = data[['season', 'posteam', 'rusher_player_name', 'play_type', 'epa', 'season_type']]

In [96]:
# Keep only the rows where the play_type is run
rushers = rushers[(rushers['season_type'] == 'REG') & (rushers['play_type'] == 'run')]

In [97]:
# Group by season, team, rusher_id, and rusher_player_name and add a column for epa/rush and # of rushes
rushers = rushers.groupby(['season', 'posteam', 'rusher_player_name'])\
    .agg({'epa': 'mean', 'play_type': 'count'})\
        .reset_index()\
            .rename(columns={'epa': 'rusher epa/rush', 'play_type': 'attempts', 'season': 'draft year', 'posteam': 'team'})


In [98]:
# inner join the two dataframes on draft year and team
df = df.merge(rushers, on=['draft year', 'team', 'rusher_player_name'], how='inner')

In [99]:
df

Unnamed: 0,season,team,team epa/rush,team epa/rush rank,team epa/rush rank next season,draft year,position,round,rusher_player_name,rusher epa/rush,attempts
0,2018,MIN,-0.172928,32.0,10.0,2019,RB,3.0,A.Mattison,-0.126276,100
1,2019,PIT,-0.230284,32.0,32.0,2020,RB,4.0,A.McFarland,-0.153372,33
2,2019,NYJ,-0.171019,31.0,29.0,2020,RB,4.0,L.Perine,-0.156732,64
3,2020,PIT,-0.118391,32.0,20.0,2021,RB,1.0,N.Harris,-0.063137,307
4,2020,NYJ,-0.101507,29.0,17.0,2021,RB,4.0,M.Carter,0.085481,24
5,2021,HOU,-0.156367,32.0,31.0,2022,RB,4.0,D.Pierce,-0.127002,220
6,2021,ATL,-0.14938,31.0,10.0,2022,RB,5.0,T.Allgeier,0.053756,211
