In [1]:
# import packages
import nfl_data_py as nfl
import numpy as np
import pandas as pd
from scipy import integrate

In [2]:
# import data
pbp_data = nfl.import_pbp_data(list(range(1999, 2024)))
pbp_data['home_team'].replace({'SD': 'LAC', 'OAK': 'LV', 'STL': 'LA'}, inplace=True)
pbp_data['away_team'].replace({'SD': 'LAC', 'OAK': 'LV', 'STL': 'LA'}, inplace=True)
cols = list(nfl.see_pbp_cols())
sample = pbp_data.sample(10)

1999 done.
2000 done.
2001 done.
2002 done.
2003 done.
2004 done.
2005 done.
2006 done.
2007 done.
2008 done.
2009 done.
2010 done.
2011 done.
2012 done.
2013 done.
2014 done.
2015 done.
2016 done.
2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
2023 done.
Downcasting floats.


In [3]:
# set up result DataFrame
ingame_stats = {
    'game_id': [], 
    'home_team': [],
    'away_team': [],
    'home_dsr': [],
    'away_dsr': [],
    'home_gc': [],
    'away_gc': [],
    'home_gc_signed': [],
    'away_gc_signed': [],
    'home_gc_avg': [],
    'away_gc_avg': [],
    'home_gc_fourthqtr': [],
    'away_gc_fourthqtr': []
}

In [4]:
# compute DSR and game control for each game
# DSR = (1st Downs + TDs) / (Drives + 1st Downs - TDs), where TDs are included in 1st Downs
# GC = integral of win probability over all plays

games = pbp_data.groupby('game_id')

# sum number of first downs and drives for each team (remove penalty firsts, end-of-half drives, qb kneel drives)
for game_group in games.groups:
    game = games.get_group(game_group)

    home_firsts = 0
    home_drives = 0
    home_tds = 0
    away_firsts = 0
    away_drives = 0
    away_tds = 0

    drives = game.groupby('drive')
    for drive_group in drives.groups:
        drive = drives.get_group(drive_group)

        if 0 in set(drive['half_seconds_remaining']):
            continue
        if 'home' in set(drive['posteam_type']):
            home_firsts -= list(drive['first_down_penalty']).count(1)
            home_tds += 1 if 1 in set(drive['touchdown']) else 0
            home_firsts += np.nanmax(drive['drive_first_downs']) if np.nanmax(drive['drive_first_downs']) else 0
            home_drives += 1
        elif 'away' in set(drive['posteam_type']):
            away_firsts -= list(drive['first_down_penalty']).count(1)
            away_tds += 1 if 1 in set(drive['touchdown']) else 0
            away_firsts += np.nanmax(drive['drive_first_downs']) if np.nanmax(drive['drive_first_downs']) else 0
            away_drives += 1
        else:
            continue

    # game ID info
    ingame_stats['game_id'].append(game['game_id'].iloc[0])
    ingame_stats['home_team'].append(game['home_team'].iloc[0])
    ingame_stats['away_team'].append(game['away_team'].iloc[0])

    # DSR stats
    try:
        ingame_stats['home_dsr'].append(home_firsts / (home_drives + home_firsts - home_tds))
        ingame_stats['away_dsr'].append(away_firsts / (away_drives + away_firsts - away_tds))
    except ZeroDivisionError:
        print(game['game_id'].iloc[0])
        ingame_stats['home_dsr'].append(0)
        ingame_stats['away_dsr'].append(0)

    # integrate win probability to get GC stats
    ingame_stats['home_gc'].append(integrate.cumulative_trapezoid(game['home_wp'].dropna())[-1]) #standard GC
    ingame_stats['away_gc'].append(integrate.cumulative_trapezoid(game['away_wp'].dropna())[-1])

    ingame_stats['home_gc_signed'].append(integrate.cumulative_trapezoid(game['home_wp'].dropna() - .5)[-1]) #signed GC
    ingame_stats['away_gc_signed'].append(integrate.cumulative_trapezoid(game['away_wp'].dropna() - .5)[-1])

    ingame_stats['home_gc_avg'].append(integrate.cumulative_trapezoid(game['home_wp'].dropna() - .5)[-1] * 1/len(game['home_wp'].dropna())) #avg GC
    ingame_stats['away_gc_avg'].append(integrate.cumulative_trapezoid(game['away_wp'].dropna() - .5)[-1] * 1/len(game['away_wp'].dropna()))

    home_fourth_wp = [play['home_wp'] - .5 if play['qtr'] == 4 else 0 for (index, play) in game.iterrows()] #4th quarter GC
    away_fourth_wp = [play['away_wp'] - .5 if play['qtr'] == 4 else 0 for (index, play) in game.iterrows()]
    ingame_stats['home_gc_fourthqtr'].append(integrate.cumulative_trapezoid(home_fourth_wp)[-1])
    ingame_stats['away_gc_fourthqtr'].append(integrate.cumulative_trapezoid(away_fourth_wp)[-1])

2013_18_KC_IND
2013_18_NO_PHI
2013_18_SD_CIN
2013_18_SF_GB
2013_19_IND_NE
2013_19_SD_DEN
2013_19_SF_CAR
2013_20_NE_DEN
2013_20_SF_SEA
2013_21_SEA_DEN


In [5]:
# example game control stats for Super Bowl 57, where home team is Eagles
weight_ex = [play['home_wp'] - .5 if play['qtr'] != 4 else (play['home_wp'] - .5) * 1.5 for (index, play) in game.iterrows()]
fourth_wp = [play['home_wp'] - .5 if play['qtr'] == 4 else 0 for (index, play) in game.iterrows()]
print(game['game_id'].iloc[0])
print(integrate.cumulative_trapezoid(weight_ex)[-1])
print(integrate.cumulative_trapezoid(game['home_wp'] - .5)[-1])
print(integrate.cumulative_trapezoid(game['home_wp'] * 1/len(game['home_wp']))[-1]) #average value of integral
print(integrate.cumulative_trapezoid(fourth_wp)[-1]) #4th quarter GC

2023_18_TB_CAR
-25.083038602024317
-17.852156169712543
0.3808301547421262
-14.461764864623547


In [6]:
# save ingame_stats
ingame_stats_df = pd.DataFrame(ingame_stats)
ingame_stats_df.to_csv('data/ingame_stats.csv', index=False)