In [5]:
# add local directory to import path
import os
import sys
module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [6]:
#### --- Standard imports ------
import pandas as pd

# local import
import nfl_bdb24
import nflutil

In [7]:
from importlib import reload
reload(nfl_bdb24)

<module 'nfl_bdb24' from 'c:\\Users\\pschl\\Documents\\data_science\\kaggle_nfl_bdb_2024\\nfl_bdb24.py'>

# Load the data, transform tracking data to standard coordinates

In [8]:
track_df = pd.DataFrame()
play_df = pd.read_csv('csv/plays.csv')
game_df = pd.read_csv('csv/games.csv')
player_df= pd.read_csv('csv/players.csv')
tackle_df = pd.read_csv('csv/tackles.csv')

# tracking data for all weeks
weeks = range(1,10)
for week in weeks:
    track_df = pd.concat([track_df, pd.read_csv(f'csv/tracking_week_{week}.csv').pipe(nflutil.transform_tracking_data)], ignore_index=True)

In [9]:
# throw out bad plays
track_df = nflutil.remove_abnormal_plays(track_df, nfl_bdb24.BDB24_BAD_PLAYS)

# Filter down to relevant plays and generate metric file

In [10]:
# does play contain tackle
playtackle_df = track_df.groupby(['gameId','playId']).apply(nfl_bdb24.util_play_contains_tackle).reset_index(name='tackle_flag')
# does play contain qb slide
playqbslide_df = track_df.groupby(['gameId','playId']).apply(nfl_bdb24.util_play_contains_qb_slide).reset_index(name='qbslide_flag')

df = (
    # attach tackle flag
    track_df
    .merge(
        playtackle_df,
        how='inner',
        on=['gameId','playId']
    )
    # attach qb slide flag
    .merge(
        playqbslide_df,
        how='inner',
        on=['gameId','playId']
    )
    # filter out plays with a qb slide
    .query('tackle_flag==True & qbslide_flag==False')
    # join with tackle data for solo tackle plays
    .merge(
        tackle_df.groupby(['gameId','playId']).sum().query('tackle==1 & assist==0').reset_index()[['gameId','playId']],
        how='inner',
        on=['gameId','playId']
    )
    # calculate tackle metrics
    .groupby(['gameId','playId'])
    .apply(nfl_bdb24.prep_get_tackle_metrics, play_df, tackle_df, player_df)
    .reset_index()
)

df

Unnamed: 0,gameId,playId,contactFrameId,tackleFrameId,frames,d_actual,d_ideal,d_eff,gap_tackle,w_carrier,w_tackler,s_downfield_delta,s_contact,s_downfield_contact,s_downfield_contact_t
0,2022090800,56,10,18,8,5.01,5.006246,0.999251,0.670298,191,208,-1.031605,2.19,1.027805,-0.176900
1,2022090800,80,20,26,6,4.87,3.768183,0.773754,0.367696,237,230,1.282870,7.54,4.724593,1.267077
2,2022090800,101,38,45,7,8.01,5.029374,0.627887,2.476812,203,184,1.479387,6.63,6.516861,0.000000
3,2022090800,122,20,29,9,12.09,11.855248,0.980583,0.573149,203,242,1.839019,5.78,4.435508,1.088474
4,2022090800,167,14,18,4,7.00,6.968414,0.995488,1.591540,191,240,2.724202,6.61,5.702966,4.467674
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6637,2022110700,3658,40,55,15,13.06,12.127753,0.928618,1.341343,211,289,-1.780591,4.77,4.476612,2.177571
6638,2022110700,3686,42,72,30,8.39,6.245190,0.744361,1.015135,211,240,-0.222288,2.07,1.630960,2.704653
6639,2022110700,3707,39,54,15,9.29,9.080666,0.977467,1.452790,211,240,0.203598,2.94,2.861382,-0.836280
6640,2022110700,3740,31,38,7,10.29,9.281939,0.902035,0.733757,211,261,0.486456,7.08,0.596133,-2.118918


In [11]:
df.to_csv('interm_files/prep_tackle_metrics.csv', index=False)