In [1]:
# Dependencies
import nfl_data_py as nfl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Years to load
years = range(2016, 2023)

In [3]:
# Load data
data = nfl.import_pbp_data(years)

2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.


: 

: 

In [None]:
# Keep only qb scrambles
df = data[['rusher_player_name', 'season', 'epa']]\
    [data['qb_scramble'] == 1.0]

: 

In [None]:
# Aggregate by player and season and add columns for attempts
# and epa per attempt
df = df.groupby(['rusher_player_name', 'season']).agg(
    attempts=('rusher_player_name', 'count'),
    epa_per_att=('epa', 'mean')
).reset_index()


: 

In [None]:
# sort by player and then by season to make sure the data is in the right order
df = df.sort_values(['rusher_player_name', 'season'])

# create a new dataframe with the current year and the following year's EPA for each player
df_comparison = df.copy()
df_comparison['next_year_epa'] = df_comparison.groupby('rusher_player_name')[
    'epa_per_att'].shift(-1)

# drop the last season for each player (since there is no "next year" data for the last season)
df_comparison = df_comparison.dropna(subset=['next_year_epa'])

: 

In [None]:
df_comparison

: 

In [None]:
# filter to minimum of 20 attempts

: 

In [None]:
# create a scatter plot with EPA in year n on the x-axis and EPA in year n+1 on the y-axis
plt.figure(figsize=(10, 6))
plt.scatter(df_comparison['epa'], df_comparison['next_year_epa'])

# adding title and labels
plt.title('QB EPA per Scramble Y/Y')
plt.xlabel('EPA per scramble in year n')
plt.ylabel('EPA per scramble in year n+1')

# add x and y intercepts at 0
plt.axhline(0, color='black', linewidth=0.5)
plt.axvline(0, color='black', linewidth=0.5)

# add player name and season as labels to each data point
for i in range(df_comparison.shape[0]):
    plt.text(df_comparison['epa'].iloc[i], df_comparison['next_year_epa'].iloc[i],
             df_comparison['rusher_player_name'].iloc[i] +
             ', ' + str(df_comparison['season'].iloc[i]),
             fontsize=8)

plt.show()


: 

: 