# Further Motion/Distance Analysis

We diver further into our motion analysis by exploring distance-related aspects of our tracking data




In [3]:
import os
import pandas as pd
import sys
sys.path.append('../')

from data_loading import load_tracking_data

In [None]:
# load tracking data for all weeks
root_dir = os.path.split(os.getcwd())[0]
tracking_fname_list = [os.path.join(root_dir,  f"data/tracking_week_{i}.csv") for i in range(1,10)]
write_track = False #change if you need to create the aggregated tracking file
if write_track:
    df_tracking = pd.concat(
            [load_tracking_data(tracking_fname) for tracking_fname in tracking_fname_list]
        )
    df_tracking.to_csv(os.path.join(root_dir,'data/track_full.csv'))
else: 
    df_tracking = pd.read_csv(os.path.join(root_dir,'data/track_full.csv'),usecols=['gameId','playId','nflId','frameId','club','a','s','dis','y'])
df_players = pd.read_csv(os.path.join(root_dir,'data/players.csv'))
df_player_play = pd.read_csv(os.path.join(root_dir,'data/player_play.csv'))

Sub to offense this time:

In [5]:
plays_fname = os.path.join(root_dir,  "data/plays.csv")
df_plays = pd.read_csv(plays_fname)
df_games = pd.read_csv(os.path.join(root_dir,'data/games.csv'))

In [6]:
play_trunc = df_plays[['gameId','playId','possessionTeam']]
pt_df = play_trunc.merge(df_tracking,how='left',left_on=['gameId','playId','possessionTeam'],
                 right_on=['gameId','playId','club'])

# na's dropped to lose players w/o tracking info
off_df = pt_df.dropna()

In [7]:
off_df.head(3)

Unnamed: 0,gameId,playId,possessionTeam,nflId,displayName,frameId,frameType,time,jerseyNumber,club,...,a,dis,o,dir,event,gameplayId,o_standard,dir_standard,x_standard,y_standard
32,2022102302,2655,CIN,42654.0,La'el Collins,33.0,BEFORE_SNAP,2022-10-23 19:14:01.5,71.0,CIN,...,0.02,0.02,104.75,316.23,line_set,2022102302_2655,104.75,316.23,29.42,20.65
182,2022102302,2655,CIN,43344.0,Tyler Boyd,33.0,BEFORE_SNAP,2022-10-23 19:14:01.5,83.0,CIN,...,0.59,0.02,88.69,176.97,line_set,2022102302_2655,88.69,176.97,29.5,14.13
332,2022102302,2655,CIN,43510.0,Ted Karras,33.0,BEFORE_SNAP,2022-10-23 19:14:01.5,64.0,CIN,...,0.03,0.01,69.35,118.78,line_set,2022102302_2655,69.35,118.78,30.25,23.7


In [8]:
example = ((off_df['gameId'] == 2022102300) & (off_df['playId'] == 2314)) & (off_df['nflId'] == 42347)

### get all but last couple frames

In [9]:
off_15 = off_df.groupby(['gameId','playId','nflId']).tail(15).reset_index(drop=True)

In [10]:
off_10 = off_15.groupby(['gameId','playId','nflId']).head(10).reset_index(drop=True)

In [11]:
len(off_15)-len(off_10)

0

In [12]:
og_10 = off_10.groupby(['gameId','playId','nflId']).agg(y_initial=('y','first'),y_final=('y','last'),dis_sum=('dis','sum'),
                                                        s_max=('s','max'),a_max=('a','max'),a_mean=('a','mean'),a_median=('a','median'),
                                                        s_mean=('s','mean'),s_min=('s','min'),a_min=('a','min')).reset_index()

get y delta

In [13]:
og_10['y_delta'] = og_10['y_final']-og_10['y_initial']

In [14]:
og_10.columns[-4:]

Index(['s_mean', 's_min', 'a_min', 'y_delta'], dtype='object')

In [15]:
delta_10 = og_10[['gameId','playId']].drop_duplicates()

In [16]:
ser_ls = []

In [17]:
for col in og_10.columns[-10:]:

    ser_ls.append(og_10.groupby(['gameId','playId'])[col].apply(lambda grp: grp.nlargest(2).diff().tail(1)).reset_index(drop=True))

In [18]:
delta_10 = delta_10.reset_index(drop=True)

In [19]:
d10_proc = pd.concat([delta_10,pd.concat(ser_ls,axis=1)],axis=1)

In [20]:
d10_proc['y_delta'] = d10_proc['y_delta'].round(2)

In [21]:
d10_proc.head(10)

Unnamed: 0,gameId,playId,y_final,dis_sum,s_max,a_max,a_mean,a_median,s_mean,s_min,a_min,y_delta
0,2022090800,56,-1.42,-0.43,-4.32,-0.47,-0.47,-0.47,-4.32,-4.32,-0.47,0.0
1,2022090800,80,-1.98,-0.38,-3.86,-0.12,-0.055,-0.055,-1.925,-0.01,-0.01,-0.57
2,2022090800,101,-4.92,0.0,-0.05,-0.07,-0.04,-0.04,-0.03,-0.02,-0.02,-0.11
3,2022090800,122,-5.15,0.0,-0.06,-0.96,-0.96,-0.96,-0.06,-0.06,-0.96,0.0
4,2022090800,167,-1.24,0.0,-0.02,-0.05,-0.05,-0.05,-0.02,-0.02,-0.05,0.0
5,2022090800,191,-5.56,-0.21,-2.05,-2.29,-2.29,-2.29,-2.05,-2.05,-2.29,0.0
6,2022090800,212,-10.79,-0.51,-4.98,-1.89,-0.865,-0.865,-2.445,-0.09,-0.16,-3.09
7,2022090800,236,-6.4,-0.3,-3.22,-2.01,-1.405,-1.405,-1.645,-0.04,-0.76,-0.09
8,2022090800,299,-6.84,-0.14,-0.87,-3.3,-1.985,-1.985,-0.815,-0.75,-0.66,-2.11
9,2022090800,343,-4.95,-0.01,-0.01,-0.08,-0.08,-0.08,-0.01,-0.01,-0.08,0.0


In [22]:
d10_flagged = d10_proc.merge(df_plays[['gameId','playId','isDropback']],how='left')

In [23]:
d10_flagged['a_max'].quantile(.1)

-2.57

In [24]:
d10_flagged['amed_25'] = (d10_flagged['a_median'] >= d10_flagged['a_median'].quantile(.25))

In [25]:
d10_flagged['amean_dis'] = d10_flagged['a_mean']/.05*d10_flagged['dis_sum']

In [26]:
d10_flagged['amean_del'] =d10_flagged['a_mean']/.5*d10_flagged['y_delta']

In [27]:
d10_flagged.corr()['isDropback']

gameId       -0.022328
playId        0.013797
y_final      -0.095247
dis_sum       0.035716
s_max         0.047063
a_max         0.091021
a_mean        0.073306
a_median      0.073547
s_mean        0.023776
s_min        -0.025073
a_min         0.012148
y_delta       0.039459
isDropback    1.000000
amed_25       0.066274
amean_dis    -0.052855
amean_del    -0.038661
Name: isDropback, dtype: float64

In [28]:
#d10_flagged[['gameId','playId','a_mean','a_max','y_delta','dis_sum']].to_csv('data/top_2_player_diff_last_10.csv')