In [1]:
from os.path import join
from tqdm import tqdm
import pandas as pd
from py import util 

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

DATA_DIR = 'data'

## 1. Load the data

In [4]:
cols = ['game_id', 'play_id', 'play_description', 'quarter', 'down',
       'yards_to_go', 'possession_team', 'defensive_team', 'game_clock', 
       'pre_snap_home_score', 'pre_snap_visitor_score', 
       'absolute_yardline_number', 'pre_snap_home_team_win_probability',
       'pre_snap_visitor_team_win_probability', 'expected_points',
       'offense_formation', 'receiver_alignment', 'play_clock_at_snap',
       'rush_location_type',
       'yards_gained', 'home_team_win_probability_added',
       'visitor_team_win_probility_added', 'expected_points_added',
       'pff_run_concept_primary',
       'pff_pass_coverage', 'pff_man_zone']

plays = (
    util.uncamelcase_columns(pd.read_csv(join(DATA_DIR, 'plays.csv')))
    .query('pff_run_concept_primary.notnull() and ~pff_run_concept_primary.isin(["TRICK","UNDEFINED"])')
    .query('play_nullified_by_penalty == "N"')
    .query('yards_gained == pre_penalty_yards_gained')
    .query('qb_kneel == 0 and qb_sneak != True and qb_spike != True')
    .query('pass_result.isna() and is_dropback == False') # Designed runs only
    .query('pff_run_pass_option == 0')
    .query('dropback_type.isna()')
    .reset_index(drop=True)
    [cols]
)
plays.insert(0, 'game_play_id', plays['game_id'].astype(str) + '_' + plays['play_id'].astype(str))

game_play_ids = plays['game_play_id'].unique()
game_ids = plays['game_id'].unique()

print(f'Number of non-RPO run plays: {len(game_play_ids)}')
print(plays.pff_run_concept_primary.value_counts())
plays.head()

Number of non-RPO run plays: 4390
pff_run_concept_primary
OUTSIDE ZONE    1396
MAN              879
INSIDE ZONE      779
POWER            395
PULL LEAD        393
COUNTER          310
DRAW             110
TRAP              93
FB RUN            35
Name: count, dtype: int64


Unnamed: 0,game_play_id,game_id,play_id,play_description,quarter,down,yards_to_go,possession_team,defensive_team,game_clock,pre_snap_home_score,pre_snap_visitor_score,absolute_yardline_number,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,expected_points,offense_formation,receiver_alignment,play_clock_at_snap,rush_location_type,yards_gained,home_team_win_probability_added,visitor_team_win_probility_added,expected_points_added,pff_run_concept_primary,pff_pass_coverage,pff_man_zone
0,2022100205_2314,2022100205,2314,(14:15) Ja.Williams up the middle to DET 32 fo...,3,2,6,DET,SEA,14:15,15,31,39,0.138289,0.861711,1.066931,SINGLEBACK,3x1,15.0,INSIDE_RIGHT,3,-0.02421,0.02421,-0.442517,MAN,Cover 6-Left,Zone
1,2022110605_3861,2022110605,3861,(:29) (Shotgun) J.Wilkins up the middle to IND...,4,1,10,IND,NE,00:29,26,3,50,0.997811,0.002189,0.991169,SHOTGUN,2x2,18.0,OUTSIDE_LEFT,5,0.002189,-0.002189,-0.991169,INSIDE ZONE,Cover-2,Zone
2,2022102306_3240,2022102306,3240,(12:55) (Shotgun) A.Dillon left tackle to WAS ...,4,2,10,GB,WAS,12:55,20,14,64,0.630746,0.369254,2.416086,SHOTGUN,3x1,4.0,INSIDE_LEFT,0,0.053308,-0.053308,-0.962012,PULL LEAD,Cover-6 Right,Zone
3,2022100212_2463,2022100212,2463,(3:18) J.Herron reported in as eligible. J.Ja...,3,1,1,LV,DEN,03:18,19,16,11,0.824414,0.175586,6.442386,JUMBO,1x1,6.0,OUTSIDE_LEFT,-2,-0.017039,0.017039,-0.822176,MAN,Goal Line,Other
4,2022110300_1182,2022110300,1182,(9:28) M.Sanders left tackle pushed ob at HST ...,2,1,10,PHI,HOU,09:28,7,7,44,0.149478,0.850522,1.703288,SINGLEBACK,2x2,9.0,INSIDE_LEFT,25,-0.020434,0.020434,1.637715,TRAP,Cover-1,Man


In [5]:
tracking = pd.DataFrame()
for i in tqdm(range(1, 10), desc='Loading tracking data', unit='file'):
    data = util.uncamelcase_columns(pd.read_csv(join(DATA_DIR, f'tracking_week_{i}.csv')))
    data.insert(
        0, 
        'game_play_id', 
        data['game_id'].astype(str) + '_' + data['play_id'].astype(str)
    )
    data = data.query('game_play_id in @game_play_ids').drop(columns=['jersey_number'])
    tracking = pd.concat([
        tracking,
        data
    ], ignore_index=True)

del data

tracking, plays = util.standardize_direction(tracking, plays)

tracking.head()

Loading tracking data:   0%|          | 0/9 [00:00<?, ?file/s]

Loading tracking data: 100%|██████████| 9/9 [01:26<00:00,  9.59s/file]


Unnamed: 0,game_play_id,game_id,play_id,nfl_id,display_name,frame_id,frame_type,time,club,play_direction,x,y,s,a,dis,o,dir,event
0,2022091200_64,2022091200,64,35459.0,Kareem Jackson,1,BEFORE_SNAP,2022-09-13 00:16:03.5,DEN,right,24.75,51.06,0.72,0.37,0.07,293.83,111.66,huddle_break_offense
1,2022091200_64,2022091200,64,35459.0,Kareem Jackson,2,BEFORE_SNAP,2022-09-13 00:16:03.6,DEN,right,24.73,51.13,0.71,0.36,0.07,294.59,108.79,
2,2022091200_64,2022091200,64,35459.0,Kareem Jackson,3,BEFORE_SNAP,2022-09-13 00:16:03.7,DEN,right,24.71,51.2,0.69,0.23,0.07,295.55,110.1,
3,2022091200_64,2022091200,64,35459.0,Kareem Jackson,4,BEFORE_SNAP,2022-09-13 00:16:03.8,DEN,right,24.68,51.26,0.67,0.22,0.07,295.55,112.02,
4,2022091200_64,2022091200,64,35459.0,Kareem Jackson,5,BEFORE_SNAP,2022-09-13 00:16:03.9,DEN,right,24.65,51.32,0.65,0.34,0.07,294.26,117.17,


In [None]:
# only include tracking data for plays at or after the ball snap
ball_snap_frames = (
    tracking[tracking['event'] == 'ball_snap']
    .groupby('game_play_id')['frameId']
    .min()
    .rename('ball_snap_frame_id')
)
tracking = tracking.merge(ball_snap_frames, on='game_play_id', how='left')
tracking = tracking[tracking['frameId'] >= tracking['ball_snap_frame_id']].reset_index(drop=True)
tracking = tracking.drop(columns=['ball_snap_frame_id'])

In [None]:
games = pd.read_csv(join(DATA_DIR, 'games.csv')).query('game_id in @game_ids').reset_index(drop=True)
games.head()

Unnamed: 0,gameId,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore
0,2022090800,2022,1,9/8/2022,20:20:00,LA,BUF,10,31
1,2022091100,2022,1,9/11/2022,13:00:00,ATL,NO,26,27
2,2022091101,2022,1,9/11/2022,13:00:00,CAR,CLE,24,26
3,2022091102,2022,1,9/11/2022,13:00:00,CHI,SF,19,10
4,2022091103,2022,1,9/11/2022,13:00:00,CIN,PIT,20,23


In [5]:
players = pd.read_csv(join(DATA_DIR, 'players.csv'))
players.head()

Unnamed: 0,nflId,height,weight,birthDate,collegeName,position,displayName
0,25511,6-4,225,1977-08-03,Michigan,QB,Tom Brady
1,29550,6-4,328,1982-01-22,Arkansas,T,Jason Peters
2,29851,6-2,225,1983-12-02,California,QB,Aaron Rodgers
3,30842,6-6,267,1984-05-19,UCLA,TE,Marcedes Lewis
4,33084,6-4,217,1985-05-17,Boston College,QB,Matt Ryan


In [None]:
player_play = pd.read_csv(join(DATA_DIR, 'player_play.csv'))
player_play.insert(0, 'game_play_id', player_play['game_id'].astype(str) + '_' + player_play['play_id'].astype(str))
player_play = player_play.query('game_play_id in @game_play_ids').reset_index(drop=True)
player_play.head()

Unnamed: 0,gamePlayId,gameId,playId,nflId,teamAbbr,hadRushAttempt,rushingYards,hadDropback,passingYards,sackYardsAsOffense,hadPassReception,receivingYards,wasTargettedReceiver,yardageGainedAfterTheCatch,fumbles,fumbleLost,fumbleOutOfBounds,assistedTackle,forcedFumbleAsDefense,halfSackYardsAsDefense,passDefensed,quarterbackHit,sackYardsAsDefense,safetyAsDefense,soloTackle,tackleAssist,tackleForALoss,tackleForALossYardage,hadInterception,interceptionYards,fumbleRecoveries,fumbleRecoveryYards,penaltyYards,penaltyNames,wasInitialPassRusher,causedPressure,timeToPressureAsPassRusher,getOffTimeAsPassRusher,inMotionAtBallSnap,shiftSinceLineset,motionSinceLineset,wasRunningRoute,routeRan,blockedPlayerNFLId1,blockedPlayerNFLId2,blockedPlayerNFLId3,pressureAllowedAsBlocker,timeToPressureAllowedAsBlocker,pff_defensiveCoverageAssignment,pff_primaryDefensiveCoverageMatchupNflId,pff_secondaryDefensiveCoverageMatchupNflId
0,2022090800_101,2022090800,101,35472,BUF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,False,,,False,False,False,,,,,,,,,,
1,2022090800_101,2022090800,101,42392,BUF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,False,,,False,False,False,,,,,,,,,,
2,2022090800_101,2022090800,101,42818,BUF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,False,,,False,False,False,,,,,,,,,,
3,2022090800_101,2022090800,101,44875,BUF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,False,,,False,False,False,,,,,,,,,,
4,2022090800_101,2022090800,101,46076,BUF,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,False,,,False,False,False,,,,,,,,,,


## 2. Metric Creation

#### a. Field Ownership

In [22]:
tracking.head()

Unnamed: 0,gamePlayId,gameId,playId,nflId,displayName,frameId,frameType,time,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022091200_64,2022091200,64,35459.0,Kareem Jackson,114,SNAP,2022-09-13 00:16:14.8,DEN,right,50.29,29.43,1.13,1.62,0.11,240.06,133.16,ball_snap
1,2022091200_64,2022091200,64,35459.0,Kareem Jackson,115,AFTER_SNAP,2022-09-13 00:16:14.9,DEN,right,50.39,29.34,1.37,1.61,0.13,240.06,131.19,
2,2022091200_64,2022091200,64,35459.0,Kareem Jackson,116,AFTER_SNAP,2022-09-13 00:16:15,DEN,right,50.52,29.24,1.65,1.76,0.16,238.47,129.12,
3,2022091200_64,2022091200,64,35459.0,Kareem Jackson,117,AFTER_SNAP,2022-09-13 00:16:15.1,DEN,right,50.67,29.12,1.99,1.94,0.2,235.66,128.32,
4,2022091200_64,2022091200,64,35459.0,Kareem Jackson,118,AFTER_SNAP,2022-09-13 00:16:15.2,DEN,right,50.85,28.98,2.32,2.03,0.23,231.72,127.25,
