In [37]:
## Expanded on Tej Seth's NFL tutorials

In [1]:
pip install nflfastpy

Collecting nflfastpy
  Downloading nflfastpy-0.0.14-py3-none-any.whl (3.5 kB)
Collecting pyreadr
  Downloading pyreadr-0.4.4-cp38-cp38-macosx_10_9_x86_64.whl (250 kB)
[K     |████████████████████████████████| 250 kB 2.0 MB/s eta 0:00:01
Installing collected packages: pyreadr, nflfastpy
Successfully installed nflfastpy-0.0.14 pyreadr-0.4.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
# import the normal packages
import pandas as pd
import nflfastpy as nfl
from matplotlib import pyplot as plt
import seaborn as sns

# additional stuff that will help us later on
import requests
from io import BytesIO

In [3]:
df = nfl.load_pbp_data(2021)

In [4]:
df

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2021_01_ARI_TEN,2021091207,TEN,ARI,REG,1,,,,...,0,1,,,,,,,,
1,40,2021_01_ARI_TEN,2021091207,TEN,ARI,REG,1,TEN,home,ARI,...,0,1,0.000000,,,,,,,
2,55,2021_01_ARI_TEN,2021091207,TEN,ARI,REG,1,TEN,home,ARI,...,0,1,-1.399805,,,,,,0.491433,-49.143299
3,76,2021_01_ARI_TEN,2021091207,TEN,ARI,REG,1,TEN,home,ARI,...,0,1,0.032412,1.165133,5.803177,4.0,0.896654,0.125098,0.697346,30.265415
4,100,2021_01_ARI_TEN,2021091207,TEN,ARI,REG,1,TEN,home,ARI,...,0,1,-1.532898,0.256036,4.147637,2.0,0.965009,0.965009,0.978253,2.174652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49470,4496,2021_19_SF_DAL,2022011602,DAL,SF,POST,19,DAL,home,SF,...,1,0,0.350600,0.286266,3.663947,2.0,0.999042,0.391675,0.996538,0.346160
49471,4525,2021_19_SF_DAL,2022011602,DAL,SF,POST,19,,,,...,0,0,0.000000,,,,,,,
49472,4548,2021_19_SF_DAL,2022011602,DAL,SF,POST,19,DAL,home,SF,...,0,0,0.557666,,,,,,0.988181,-98.818052
49473,4584,2021_19_SF_DAL,2022011602,DAL,SF,POST,19,DAL,home,SF,...,0,0,-1.875698,,,,,,,


In [5]:
df.columns

Index(['play_id', 'game_id', 'old_game_id', 'home_team', 'away_team',
       'season_type', 'week', 'posteam', 'posteam_type', 'defteam',
       ...
       'out_of_bounds', 'home_opening_kickoff', 'qb_epa', 'xyac_epa',
       'xyac_mean_yardage', 'xyac_median_yardage', 'xyac_success', 'xyac_fd',
       'xpass', 'pass_oe'],
      dtype='object', length=372)

In [9]:
df[["posteam", "defteam", "pass", "rush", "epa"]].head()

Unnamed: 0,posteam,defteam,pass,rush,epa
0,,,0,0,
1,TEN,ARI,0,0,0.0
2,TEN,ARI,0,1,-1.399805
3,TEN,ARI,1,0,0.032412
4,TEN,ARI,1,0,-1.532898


In [14]:
#group by posteam and look at mean of epa
df.groupby('posteam')[['epa']].mean().sort_values(by = 'epa', ascending = False)

Unnamed: 0_level_0,epa
posteam,Unnamed: 1_level_1
KC,0.115459
GB,0.087398
TB,0.077287
BUF,0.07392
LA,0.052903
SF,0.045069
NE,0.043025
LAC,0.042011
DAL,0.041554
IND,0.039431


In [21]:
qbs = df.groupby(['passer_player_id', 'passer_player_name'],
                 as_index = False).agg({'play_id': 'count', 'epa' :'mean', 'cpoe': 'mean'})

#how to filter for only 199 dropbacks
qbs = qbs.loc[qbs.play_id > 199]

#sort on EPA
qbs.sort_values('epa', ascending = False, inplace = True)

#round to 2 decimals
qbs = qbs.round(2)

#rename columns
qbs.columns = ['ID', 'Player', 'Dropbacks', 'EPA', 'CPOE']

In [22]:
qbs

Unnamed: 0,ID,Player,Dropbacks,EPA,CPOE
3,00-0023459,A.Rodgers,287,0.31,6.07
12,00-0026498,M.Stafford,652,0.2,0.19
70,00-0033873,P.Mahomes,730,0.2,3.12
0,00-0019596,T.Brady,783,0.18,2.03
116,00-0036442,J.Burrow,609,0.16,6.95
40,00-0031345,J.Garoppolo,497,0.16,2.22
114,00-0036355,J.Herbert,710,0.15,-0.55
4,00-0023459,Aa.Rodgers,275,0.15,5.58
58,00-0033077,D.Prescott,679,0.14,1.56
85,00-0034857,J.Allen,656,0.12,1.93


In [23]:
#how to look at pass attempts and air yards for 3rd and 4th downs
df_passes = df.loc[(df['pass_attempt'] == 1) & (df['air_yards'].notnull()) & (df.down.isin([3,4]))]

In [25]:
df_passes[["down", "air_yards"]]

Unnamed: 0,down,air_yards
4,3.0,10.0
15,3.0,16.0
25,3.0,5.0
30,3.0,-1.0
34,3.0,3.0
...,...,...
49420,3.0,2.0
49436,3.0,5.0
49444,3.0,3.0
49453,3.0,7.0


In [26]:
df_passes = df_passes[['passer_player_id', 'passer_player_name', 'posteam', 'down', 'ydstogo', 'air_yards']]

In [29]:
# this will tell 
df_passes['yards_past_sticks'] = df_passes['air_yards'] - df_passes['ydstogo']

df_passes.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passes['yards_past_sticks'] = df_passes['air_yards'] - df_passes['ydstogo']


Unnamed: 0,passer_player_id,passer_player_name,posteam,down,ydstogo,air_yards,yards_past_sticks
4,00-0029701,R.Tannehill,TEN,3.0,10,10.0,0.0
15,00-0035228,K.Murray,ARI,3.0,16,16.0,0.0
25,00-0035228,K.Murray,ARI,3.0,5,5.0,0.0
30,00-0029701,R.Tannehill,TEN,3.0,16,-1.0,-17.0
34,00-0035228,K.Murray,ARI,3.0,3,3.0,0.0
38,00-0029701,R.Tannehill,TEN,3.0,12,3.0,-9.0
39,00-0032496,M.Farley,TEN,4.0,4,4.0,0.0
64,00-0029701,R.Tannehill,TEN,3.0,10,7.0,-3.0
72,00-0035228,K.Murray,ARI,3.0,10,17.0,7.0
77,00-0035228,K.Murray,ARI,3.0,6,20.0,14.0


In [35]:
#top 10 quarterbacks with yards past the sticks
top_10 = df_passes.groupby(['passer_player_id', 'passer_player_name', 'posteam'],
                           as_index = False)['yards_past_sticks'].sum().sort_values(by = 'yards_past_sticks', ascending = False)[:10]

top_10

Unnamed: 0,passer_player_id,passer_player_name,posteam,yards_past_sticks
11,00-0026498,M.Stafford,LA,707.0
21,00-0029263,R.Wilson,SEA,597.0
43,00-0033077,D.Prescott,DAL,522.0
31,00-0031280,D.Carr,LV,507.0
23,00-0029604,K.Cousins,MIN,477.0
82,00-0036355,J.Herbert,LAC,461.0
69,00-0035228,K.Murray,ARI,415.0
52,00-0033873,P.Mahomes,KC,398.0
89,00-0036971,T.Lawrence,JAX,381.0
0,00-0019596,T.Brady,TB,380.0
