In [None]:
# install the nflfastpy package
pip install nflfastpy

In [None]:
# import the normal packages
import pandas as pd
import nflfastpy as nfl
from matplotlib import pyplot as plt
import seaborn as sns

# additional stuff that will help us later on
import requests
from io import BytesIO

In [None]:
# load in the play-by-play data
df = nfl.load_pbp_data(2021)

In [None]:
# see the first 5 rows of the dataset
df.head()

In [None]:
# see what the column names are
df.columns

In [None]:
# look at specific columns
df[["posteam", "defteam", "pass", "rush", "epa"]].head()

In [None]:
# let's rank offenses from best to worst using epa
df.groupby('posteam')[['epa']].mean().sort_values(by='epa', ascending=False)

In [None]:
# let's do the same for QB's
qbs = df.groupby(['passer_player_id','passer_player_name'], 
                 as_index=False).agg({'play_id':'count', 'epa':'mean','cpoe':'mean'})

# at least 200 plays
qbs = qbs.loc[qbs.play_id>199]

# sort by EPA
qbs.sort_values('epa', ascending=False, inplace=True)

# round to two decimal places where appropriate
qbs = qbs.round(2)

# rename columns
qbs.columns = ['ID','Player','Dropbacks','EPA','CPOE']

qbs

In [None]:
# Now we're going to make a yards past the sticks graph

# filter to get the data we want
df_passes = df.loc[(df['pass_attempt'] == 1) & (df['air_yards'].notnull()) & (df.down.isin([3, 4]))]

# select the columns we need
df_passes = df_passes[['passer_player_id', 'passer_player_name', 'posteam', 'down', 'ydstogo', 'air_yards']]

# create a new column
df_passes['yards_past_sticks'] = df_passes['air_yards'] - df_passes['ydstogo']

# check the dataset to make sure everything is there
df_passes.head(10)

In [None]:
# get the top 10 passers in yards past sticks
top_10 = df_passes.groupby(['passer_player_id', 'passer_player_name', 'posteam'], as_index=False)['yards_past_sticks'].sum().sort_values(by='yards_past_sticks', ascending=False)[:10]

# load the team color logos
team_logo_colors = nfl.load_team_logo_data()

# see what the table looks like
top_10

# keep just the names from the top 10 in the passes dataset
df_passes = df_passes.loc[df['passer_player_id'].isin(top_10['passer_player_id'])]

# add the team colors
df_passes = df_passes.merge(team_logo_colors[['team_abbr', 'team_color', 'team_color2']].rename(columns={'team_abbr': 'posteam'}), on='posteam')

# see what the dataset looks like
df_passes


In [None]:
# create figure and axes
# 5 here means 5 rows
# 2 here means 2 columns
# set the figsize to 10 inches x 15 inches
fig, ax = plt.subplots(5, 2, figsize=(10, 15))

# the ax object is a list of lists
# this is a function that "flattens" the list of lists in to a single list
flatten = lambda t: [item for sublist in t for item in sublist]

# flatten our list of axis lists
axes = flatten(ax)

# separate our play by play df in to 10 seperate dfs
players = [group[-1] for group in df_passes.groupby('passer_player_id')]

# zip that ^ and the axes together and iterate over them
for ax, player_df in zip(axes, players):

    # plot YAC distribution
    sns.kdeplot(player_df['yards_past_sticks'], ax=ax, lw=4, color=player_df['team_color'].values[0])
    
    # fill in the area underneath the curve
    xy = ax.get_lines()[0].get_xydata()
    x, y = xy[:, 0], xy[:, 1]
    ax.fill_between(x, y, color=player_df['team_color'].values[0], alpha=0.4)
    
    # adjust y ticks to only include these values
    ax.set_yticks([0, 0.1, 0.2])

    # set the axis title
    receiver_player_name = player_df['passer_player_name'].values[0]
    ax.set_title(f'\n{receiver_player_name}', fontsize=16, fontweight=450)

    # set ylim and xlim
    ax.set_ylim(bottom=0, top=0.20)
    ax.set_xlim(left=-5, right=55)
    
    # set the title for the figure (not the axis's)
    fig.suptitle("Top 10 QB's in Yards Past Sticks in 2021", fontsize=18)
    # add some margin between axis's
    fig.tight_layout()
    # adjust the subplots to add some room for the super title
    fig.subplots_adjust(top=0.92)
    # set figure background title
    fig.set_facecolor('white')
