In [1]:
# Download data using Kaggle API
import numpy as np
import pandas as pd
import os
import math

script_dir = os.getcwd()
os.chdir('../..')
proj_dir = os.getcwd()
from src.definitions.name_space import *
data_dir = os.path.join(proj_dir, "data")
# os.system("rmdir /s {}".format(data_dir))
# os.system("mkdir data")
# os.chdir(data_dir)
# os.system("kaggle competitions download -c nfl-big-data-bowl-2024")
# os.system('tar -xf nfl-big-data-bowl-2024.zip')

In [2]:
os.chdir(data_dir)
games = pd.read_csv('games.csv')
players = pd.read_csv('players.csv')
plays = pd.read_csv('plays.csv')
tackles = pd.read_csv('tackles.csv')

In [60]:
tracking = pd.DataFrame()
for i in range(1,10):
    tracking = pd.concat((tracking, pd.read_csv(f'tracking_week_{i}.csv')))

In [61]:
play_merge_cols = ['gameId', 'playId','ballCarrierId','passResult', 'playDescription']
tracking = tracking.merge(plays[play_merge_cols], on = ['gameId', 'playId'])

In [63]:
tracking['playType'] = np.where(tracking.passResult.isna(), 'rush', 'pass')
tracking['playResult'] = tracking.passResult.replace('R', 'S').fillna('R')
ball_events = ['handoff', 'snap_direct', 'lateral', 'pass_forward','pass_outcome_caught']
for ball_event in ball_events:
    tracking[ball_event] = np.where(tracking.event == ball_event, 1, np.NaN)

In [68]:
tracking.event.unique()

array([nan, 'pass_arrived', 'pass_outcome_caught', 'tackle', 'run',
       'first_contact', 'ball_snap', 'handoff', 'touchdown',
       'out_of_bounds', 'man_in_motion', 'fumble', 'play_action',
       'pass_forward', 'lateral', 'autoevent_passforward',
       'autoevent_passinterrupted', 'line_set', 'qb_slide', 'shift',
       'run_pass_option', 'qb_sack', 'pass_shovel', 'autoevent_ballsnap',
       'snap_direct', 'fumble_defense_recovered',
       'fumble_offense_recovered', 'penalty_flag', 'safety',
       'pass_outcome_touchdown', 'penalty_accepted'], dtype=object)

In [69]:
def angle_transform(angle):
    # Set angle range to (-180,180]
    return np.where(angle > 180, angle - 360, np.where(angle <= -180, angle + 360, angle))

In [70]:
# Standardize plays to move right to left
moves_left = tracking.playDirection == 'left'
# Merge in side of ball for each player
tracking['position'] = tracking.nflId.map(pd.Series(players.position.values, index = players.nflId))
tracking['unit'] = np.where(tracking.position.isin(off_group), 'offense', 'defense')
# Shift field so that defense faces right and left GL (behind defense) lies on x = 0.0
tracking.loc[:,'x_gl'] = np.where(moves_left, tracking.x - 10.0, 110.0 - tracking.x)
tracking.loc[:,'y_rsl'] = tracking.y.mask(~moves_left, (53.33) - tracking.y)
# # Rotate orientation and direction towards the end zone the defense is facing
for angle in ['o', 'dir']:
    # Reverse direction of angle increase to counter-clockwise to align with mathematical norms and facilitate calculation
    # of other angles
    angle_rev = 360 - tracking[angle]
    tracking.loc[:,angle + '_los'] = angle_transform(np.where(moves_left, angle_rev + 90, angle_rev - 90))
tracking.loc[:,'dist_from_nsl'] = tracking.y_rsl.mask(tracking.y_rsl > 26.666, 53.33 - tracking.y_rsl)

In [71]:
tracking.query('(frameId == 10) & (playDirection == "left")')[['playDirection', 'unit', 'o', 'o_los', 'dir', 'dir_los']].iloc[:30]

Unnamed: 0,playDirection,unit,o,o_los,dir,dir_los
9,left,offense,246.07,-156.07,85.87,4.13
31,left,defense,344.37,105.63,2.34,87.66
53,left,defense,216.76,-126.76,186.45,-96.45
75,left,offense,294.28,155.72,337.9,112.1
97,left,offense,104.58,-14.58,207.99,-117.99
119,left,defense,341.66,108.34,327.17,122.83
141,left,defense,140.33,-50.33,177.68,-87.68
163,left,defense,155.56,-65.56,223.28,-133.28
185,left,defense,355.04,94.96,250.64,-160.64
207,left,offense,262.54,-172.54,316.97,133.03


In [20]:
tracking.query('(frameId == 10) & (playDirection == "right")')[['playDirection', 'unit', 'x', 'x_gl', 
                                                               'y', 'y_rsl', 'dist_from_nsl']].iloc[:30]

Unnamed: 0,playDirection,unit,x,x_gl,y,y_rsl,dist_from_nsl
6955,right,offense,24.82,85.18,23.97,29.36,23.97
7008,right,defense,27.3,82.7,28.29,25.04,25.04
7061,right,defense,41.8,68.2,21.74,31.59,21.74
7114,right,defense,32.75,77.25,34.32,19.01,19.01
7167,right,offense,26.11,83.89,35.77,17.56,17.56
7220,right,defense,26.81,83.19,21.41,31.92,21.41
7273,right,offense,25.63,84.37,21.36,31.97,21.36
7326,right,offense,25.56,84.44,19.18,34.15,19.18
7379,right,offense,22.87,87.13,22.34,30.99,22.34
7432,right,defense,30.63,79.37,22.13,31.2,22.13


In [72]:
ball_loc = tracking.query("club == 'football'")[['gameId', 'playId', 'frameId','x_gl', 'y_rsl']].rename(columns = {'x_gl': 'x_ball', 'y_rsl': 'y_ball'})

In [73]:
tracking = tracking.merge(ball_loc, on = ['gameId', 'playId', 'frameId']) 

In [74]:
import math
tracking['ball_vector_x'] = tracking.x_ball - tracking.x_gl
tracking['ball_vector_y'] = tracking.y_ball - tracking.y_rsl
tracking['dir_to_ball'] = np.where(tracking.club != 'football', np.degrees(np.arctan2(tracking.ball_vector_y, tracking.ball_vector_x)), -1)

In [75]:
tracking.query('frameId == 3').iloc[:30][['position', 'unit','x_gl', 'x_ball', 'y_rsl', 'y_ball', 'ball_vector_x', 'ball_vector_y', 'dir_los','dir_to_ball']]

Unnamed: 0,position,unit,x_gl,x_ball,y_rsl,y_ball,ball_vector_x,ball_vector_y,dir_los,dir_to_ball
46,G,offense,78.56,71.739998,27.01,35.59,-6.820002,8.58,-57.05,128.480206
47,ILB,defense,68.16,71.739998,27.88,35.59,3.579998,7.71,97.17,65.093061
48,DT,defense,81.62,71.739998,30.98,35.59,-9.880002,4.61,-51.82,154.986285
49,C,offense,78.27,71.739998,28.92,35.59,-6.530002,6.67,89.72,134.392349
50,WR,offense,70.34,71.739998,37.09,35.59,1.399998,-1.5,-104.4,-46.974975
51,CB,defense,61.55,71.739998,7.67,35.59,10.189998,27.92,-171.05,69.949371
52,CB,defense,68.06,71.739998,39.63,35.59,3.679998,-4.04,-85.43,-47.669905
53,DE,defense,82.15,71.739998,33.52,35.59,-10.410002,2.07,-75.38,168.75359
54,DT,defense,78.57,71.739998,25.59,35.59,-6.830002,10.0,86.27,124.333084
55,T,offense,79.69,71.739998,25.96,35.59,-7.950002,9.63,35.53,129.541219


In [76]:
bc_dir = tracking[tracking.nflId == tracking.ballCarrierId][['gameId', 'playId', 'frameId', 'x_gl', 'y_rsl', 
                                                             'o_los', 'dir_los', 'dist_from_nsl']]
bc_dir = bc_dir.rename(columns = {col: 'bc_' + col for col in bc_dir.columns.values[3:]})
tracking = tracking.merge(bc_dir, on = ['gameId', 'playId', 'frameId'])

In [77]:
tracking.query('frameId == 5').iloc[:30]

KeyboardInterrupt: 