# Further Motion/Distance Analysis

We diver further into our motion analysis by exploring distance-related aspects of our tracking data




In [1]:
import os
import pandas as pd
from data_loading import load_tracking_data
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# load tracking data for all weeks
root_dir = os.getcwd()
tracking_fname_list = [os.path.join(root_dir,  f"data/tracking_week_{i}.csv") for i in range(1,10)]
write_track = False
if write_track:
    df_tracking = pd.concat(
            [load_tracking_data(tracking_fname) for tracking_fname in tracking_fname_list]
        )
    df_tracking.to_csv('data/track_full.csv')
else: 
    df_tracking = pd.read_csv('data/track_full.csv',usecols=['gameId','playId','nflId','frameId','club','a','s','dis','y'])
df_players = pd.read_csv(os.path.join(root_dir,'data/players.csv'))
df_player_play = pd.read_csv(os.path.join(root_dir,'data/player_play.csv'))

Sub to offense this time:

In [3]:
plays_fname = os.path.join(root_dir,  "data/plays.csv")
df_plays = pd.read_csv(os.path.join(root_dir,plays_fname))
df_games = pd.read_csv(os.path.join(root_dir,'data/games.csv'))

In [4]:
play_trunc = df_plays[['gameId','playId','possessionTeam']]
pt_df = play_trunc.merge(df_tracking,how='left',left_on=['gameId','playId','possessionTeam'],
                 right_on=['gameId','playId','club'])

# na's dropped to lose players w/o tracking info
off_df = pt_df.dropna()

In [5]:
off_df.head(3)

Unnamed: 0,gameId,playId,possessionTeam,nflId,frameId,club,y,s,a,dis
0,2022102302,2655,CIN,42654.0,1.0,CIN,20.62,0.13,0.87,0.02
1,2022102302,2655,CIN,42654.0,2.0,CIN,20.62,0.13,0.75,0.02
2,2022102302,2655,CIN,42654.0,3.0,CIN,20.63,0.15,0.45,0.02


In [6]:
example = ((off_df['gameId'] == 2022102300) & (off_df['playId'] == 2314)) & (off_df['nflId'] == 42347)

### get all but last couple frames

In [7]:
off_15 = off_df.groupby(['gameId','playId','nflId']).tail(15).reset_index(drop=True)

In [8]:
off_10 = off_15.groupby(['gameId','playId','nflId']).head(10).reset_index(drop=True)

In [9]:
len(off_15)-len(off_10)

874852

In [10]:
og_10 = off_10.groupby(['gameId','playId','nflId']).agg(y_initial=('y','first'),y_final=('y','last'),dis_sum=('dis','sum'),
                                                        s_max=('s','max'),a_max=('a','max'),a_mean=('a','mean'),a_median=('a','median'),
                                                        s_mean=('s','mean'),s_min=('s','min'),a_min=('a','min')).reset_index()

get y delta

In [11]:
og_10['y_delta'] = og_10['y_final']-og_10['y_initial']

In [12]:
og_10.columns[-4:]

Index(['s_mean', 's_min', 'a_min', 'y_delta'], dtype='object')

In [13]:
delta_10 = og_10[['gameId','playId']].drop_duplicates()

In [14]:
ser_ls = []

In [15]:
for col in og_10.columns[-10:]:

    ser_ls.append(og_10.groupby(['gameId','playId'])[col].apply(lambda grp: grp.nlargest(2).diff().tail(1)).reset_index(drop=True))

In [16]:
delta_10 = delta_10.reset_index(drop=True)

In [17]:
d10_proc = pd.concat([delta_10,pd.concat(ser_ls,axis=1)],axis=1)

In [18]:
d10_proc['y_delta'] = d10_proc['y_delta'].round(2)

In [19]:
d10_proc.head(10)

Unnamed: 0,gameId,playId,y_final,dis_sum,s_max,a_max,a_mean,a_median,s_mean,s_min,a_min,y_delta
0,2022090800,56,-9.39,-0.2,-0.06,-0.01,-0.04,-0.07,-0.024,-0.03,-0.01,-0.01
1,2022090800,80,-4.49,-2.3,-3.28,-4.94,-4.325,-4.815,-2.261,-1.63,-2.95,-0.05
2,2022090800,101,-4.92,-0.11,-0.86,-2.14,-0.646,-0.095,-0.183,-0.01,-0.1,-0.24
3,2022090800,122,-5.15,-0.07,-0.02,-0.01,-0.018,-0.02,-0.026,-0.02,-0.02,-0.04
4,2022090800,167,-1.23,-0.12,-0.37,-1.03,-0.477,-0.385,-0.197,-0.02,-0.14,-0.02
5,2022090800,191,-5.08,-0.11,-0.13,-0.01,-0.013,-0.015,-0.067,-0.04,-0.03,-0.01
6,2022090800,212,-10.82,-4.33,-5.98,-4.01,-2.671,-2.38,-4.44,-2.6,-1.95,-4.14
7,2022090800,236,-5.17,-2.14,-3.67,-5.6,-3.392,-3.52,-2.473,-0.26,-0.65,-0.21
8,2022090800,299,-6.85,-2.28,-4.84,-4.41,-3.445,-3.985,-2.435,-0.41,-1.59,-2.22
9,2022090800,343,-4.74,-2.1,-3.01,-3.03,-2.142,-2.36,-2.176,-0.67,-0.74,-2.12


In [20]:
d10_flagged = d10_proc.merge(df_plays[['gameId','playId','isDropback']],how='left')

In [21]:
d10_flagged['a_max'].quantile(.1)

-3.89

In [22]:
d10_flagged['amed_25'] = (d10_flagged['a_median'] >= d10_flagged['a_median'].quantile(.25))

In [23]:
d10_flagged['amean_dis'] = d10_flagged['a_mean']/.05*d10_flagged['dis_sum']

In [24]:
d10_flagged['amean_del'] =d10_flagged['a_mean']/.5*d10_flagged['y_delta']

In [25]:
d10_flagged.corr()['isDropback']

gameId       -0.022265
playId        0.013461
y_final      -0.109173
dis_sum       0.025642
s_max         0.077960
a_max         0.155504
a_mean        0.136929
a_median      0.130354
s_mean        0.028333
s_min        -0.022216
a_min         0.068027
y_delta       0.021588
isDropback    1.000000
amed_25       0.117585
amean_dis    -0.076095
amean_del    -0.052344
Name: isDropback, dtype: float64

TODO: incorp accel min, median/mean?

In [27]:
d10_flagged[['gameId','playId','a_mean','a_max','y_delta','dis_sum']].to_csv('data/top_2_player_diff_last_10.csv')