In [107]:
import os
import sys
from os.path import join
import json

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import nfl_data_py as nfl

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(ROOT_DIR,'py'))

import util
from plot.plot_simple import plot_play_with_speed

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

with open("paths.json", 'r') as f:
    paths = json.load(f)

PROCESSED_DATA_PATH = paths['processed_data']

In [108]:
WEEK = 9

df_tracking = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'tracking_final.pkl'))
df_game = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'games_final.pkl'))
df_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'play_final.pkl'))
df_player_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'player_play_final.pkl'))
df_player = pd.read_pickle(join(PROCESSED_DATA_PATH, 'players.pkl'))
df_team = pd.read_pickle(join(PROCESSED_DATA_PATH, 'teams.pkl'))

# Drop plays where QB is the ball carrier

In [109]:
(
    df_player_play
    [['game_play_id','had_rush_attempt','nfl_id']]
    .query('had_rush_attempt == 1')
    .merge(df_player[['nfl_id','position']], on='nfl_id', how='left')
    .position.value_counts()
)

RB    484
WR     16
TE      3
FB      1
Name: position, dtype: int64

In [110]:
gpids_with_qb_rusher = (
    df_player_play
    [['game_play_id','had_rush_attempt','nfl_id']]
    .query('had_rush_attempt == 1')
    .merge(df_player[['nfl_id','position']], on='nfl_id', how='left')
    .query('position == "QB"')
    .game_play_id
    .unique()
    .tolist()
)
keep_gids = df_play.query('~game_play_id.isin(@gpids_with_qb_rusher)').game_id.unique().tolist()

if len(gpids_with_qb_rusher) != 0:
    print(f'Dropping {len(gpids_with_qb_rusher)} plays with QB rusher')
    df_tracking = df_tracking.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_play = df_play.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_player_play = df_player_play.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_game = df_game.query('game_id.isin(@gpids_with_qb_rusher)')

# Verify all plays have 5 labeled oline players (LT, LT, C, RT, RT)

In [111]:
all_gpids = set(df_tracking.game_play_id.unique().tolist())

gpids_with_oline = (
    set(
        df_tracking
        .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
        .game_play_id
        .unique()
        .tolist()
    )
)

all_gpids - gpids_with_oline

set()

# Define primary RB on plays with multiple RBs

In [112]:
qb_xy = (
    df_tracking
    .query('position_by_loc == "QB"')
    [['game_play_id','frame_id','x','y']]
    .rename(columns={'x':'qb_x','y':'qb_y'})
)
oline_left_right = (
    df_tracking
    .query('position_by_loc.isin(["LT","RT"]) and frame_id == ball_snap_fid')
    [['game_play_id','position_by_loc','x']]
    .assign(oline_box_left=lambda x: np.where(x.position_by_loc == "LT", x.x, np.nan))
    .assign(oline_box_right=lambda x: np.where(x.position_by_loc == "RT", x.x, np.nan))
    .drop(columns=['position_by_loc','x'])
)

primary_rb = (
    df_tracking
    .query('position_by_loc == ["RB","FB","WR","TE"] and ball_snap_fid <= frame_id <= ball_snap_fid + 20')
        [['game_play_id','frame_id','ball_snap_fid','nfl_id','x','y','position_by_loc']]
    .merge(
        qb_xy,
        on=['game_play_id','frame_id'],
        how='left'
    )
    .merge(
        oline_left_right[['game_play_id','oline_box_left']].query('oline_box_left.notnull()'),
        on=['game_play_id'],
        how='left'
    )
    .merge(
        oline_left_right[['game_play_id','oline_box_right']].query('oline_box_right.notnull()'),
        on=['game_play_id'],
        how='left'
    )
    .query('oline_box_left - 1 <= x <= oline_box_right + 1')
    .assign(qb_y_at_snap=lambda x: np.where(x.frame_id == x.ball_snap_fid, x.qb_y, np.nan))
    .assign(rb_y_at_snap=lambda x: np.where(x.frame_id == x.ball_snap_fid, x.y, np.nan))
    .groupby('game_play_id',group_keys=False) 
    .apply(lambda group: group.ffill(axis=0)) 
    .reset_index(drop=True)
    .query('rb_y_at_snap < qb_y_at_snap + 1')
    .rename(columns={'position_by_loc':'rb_pos'})
)
primary_rb['dist_to_qb'] = np.sqrt((primary_rb['x'] - primary_rb['qb_x'])**2 + (primary_rb['y'] - primary_rb['qb_y'])**2)
positional_weight = {
    'RB': 1,
    'FB': 2,
    'WR': 3,
    'TE': 4,
}
primary_rb['pos_weight'] = primary_rb['rb_pos'].map(positional_weight)
primary_rb = (
    primary_rb
    .query('dist_to_qb <= 10')
    .sort_values(['pos_weight','dist_to_qb'], ascending=[True,True])
    .drop_duplicates('game_play_id', keep='first')
)
primary_rb['primary_rb'] = True
if 'primary_rb' in df_tracking.columns:
    df_tracking = df_tracking.drop(columns=['primary_rb'])
df_tracking = df_tracking.merge(primary_rb[['game_play_id','nfl_id','primary_rb']], on=['game_play_id','nfl_id'], how='left')
df_tracking = df_tracking.fillna({'primary_rb':False})
df_tracking = df_tracking.merge(primary_rb[['game_play_id','rb_pos']], on='game_play_id', how='left')
# del primary_rb, qb_xy

In [113]:
#plays w/o a primary rb
drop_plays = (
    df_tracking
    .sort_values('primary_rb', ascending=False)
    .drop_duplicates('game_play_id', keep='first')
    .query('~primary_rb')
    .game_play_id
    .unique().tolist()
)

if len(drop_plays) != 0:
    print(f'Dropping {len(drop_plays)} plays w/o a primary rb')
    print(drop_plays)
    df_tracking = df_tracking.query('~game_play_id.isin(@drop_plays)')
    df_play = df_play.query('~game_play_id.isin(@drop_plays)')
    df_player_play = df_player_play.query('~game_play_id.isin(@drop_plays)')

Dropping 1 plays w/o a primary rb
['2022110601_781']


# Standardize play direction based on initial rb direction

In [114]:
rb_dir = (
    df_tracking
    .query('ball_snap_fid + 10 <= frame_id <= ball_snap_fid + 20 and primary_rb')
    [['game_play_id','dir']]
    .assign(dir=lambda x: np.where(x['dir'] > 270, 0, x['dir']))
    .groupby('game_play_id')
    .mean()
    .reset_index()
    .rename(columns={'dir':'rb_dir_post_snap'})
)
rb_dir['play_dir'] = np.where(
    rb_dir['rb_dir_post_snap'] < 90,
    'right',
    'left'
)
rb_dir['play_dir_location'] = np.select(
    [
        rb_dir['rb_dir_post_snap'] <= 45,
        rb_dir['rb_dir_post_snap'] <= 85,
        rb_dir['rb_dir_post_snap'] <= 95,
        rb_dir['rb_dir_post_snap'] <= 135,
    ],
    [
        'outside-right',
        'inside-right',
        'middle',
        'inside-left'
    ],
    default='outside-left'
)

In [115]:
rb_dir.head()

Unnamed: 0,game_play_id,rb_dir_post_snap,play_dir,play_dir_location
0,2022110300_100,141.525455,left,outside-left
1,2022110300_1091,134.243636,left,inside-left
2,2022110300_1161,96.680909,left,inside-left
3,2022110300_1182,115.484545,left,inside-left
4,2022110300_1208,168.286364,left,outside-left


In [116]:
df_run_concept = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'run_concept.pkl'))

df_run_concept = df_run_concept.merge(rb_dir[['game_play_id','play_dir','play_dir_location']], on='game_play_id', how='left')

df_run_concept.play_dir.isna().sum()

0

In [117]:
df_run_concept.head()

Unnamed: 0,game_play_id,rb_dir_post_snap,avg_oline_angle_1s_after_snap,var_oline_angle_1s_after_snap,avg_oline_angle_1s_after_snap_4_rightmost_oline,var_oline_angle_1s_after_snap_4_rightmost_oline,avg_oline_dx_1s_after_snap,dx_oline_1s_after_snap,n_pullers_left_of_center,right_gaurd_pulls,n_puller_behind_los_3s_after_snap,shotgun,singleback,i_form,pistol,jumbo,run_concept,play_dir,play_dir_location
0,2022110605_3861,59.339091,162.437078,24371.89855,195.154105,25114.338888,0.626,0.626,0.0,0.0,0.0,1,0,0,0,0,INSIDE ZONE,right,inside-right
1,2022110300_1182,295.484545,120.613286,6538.233344,97.03178,5390.891111,-0.094,-0.094,0.0,0.0,0.0,0,1,0,0,0,TRAP,left,inside-left
2,2022110600_2292,47.077273,20.5454,133.059014,23.596481,119.757918,2.47,2.47,0.0,0.0,0.0,0,0,0,1,0,OUTSIDE ZONE,right,inside-right
3,2022110604_328,39.861818,213.536723,12571.07133,256.83478,6333.968089,0.092,0.092,0.0,0.0,0.0,1,0,0,0,0,POWER,right,outside-right
4,2022110300_2479,315.269091,205.458357,5713.120468,202.938527,7112.343677,-0.198,-0.198,1.0,0.0,1.0,1,0,0,0,0,POWER,left,outside-left


In [118]:
df_run_concept.shape

(497, 19)

In [119]:
df_run_concept.to_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'run_concept.pkl'))