In [1]:
import os
import sys
from os.path import join
import json

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import nfl_data_py as nfl

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(ROOT_DIR,'py'))

import util
from plot.plot_simple import plot_play_with_speed

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

with open("paths.json", 'r') as f:
    paths = json.load(f)

PROCESSED_DATA_PATH = paths['processed_data']

# Classify Underlying Run Concept of "Trick" plays
The motion player often receives the handoff on trick plays, but there are underlying run concepts on many of these plays. We want to classify these run concepts because whether or not the motion player receives the ball is a feature in the downstream model.

In [2]:
df_run_concepts = pd.DataFrame()
for wk in tqdm(range(1,10)):
    df_trk_tmp = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{wk}', 'tracking_final.pkl'))
    df_ply_tmp = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{wk}', 'play_final.pkl'))
    df_motion_tmp = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{wk}', 'motion_plays.pkl'))
    df_trk_tmp = (
        df_trk_tmp
        .query('offense')
        [['game_play_id','nfl_id','motion_player']]
        .merge(
            (
                df_ply_tmp
                [['game_play_id','pff_run_concept_primary','pff_run_concept_secondary','rush_location_type','run_location_desc','run_location']]
                .rename(columns={'pff_run_concept_primary':'run_concept'})
            ),
            on='game_play_id',
            how='left'
        )
        .merge(
            df_motion_tmp
            [['game_play_id','motion_nfl_id','motion_had_rush_attempt','motion_group']],
            left_on=['game_play_id','nfl_id'],
            right_on=['game_play_id','motion_nfl_id'],
            how='left'
        )
        .sort_values(['motion_player','motion_had_rush_attempt'], ascending=[False,False])
        .drop_duplicates(['game_play_id'], keep='first')
    )
    df_run_concepts = pd.concat([df_run_concepts, df_trk_tmp])
del df_trk_tmp, df_ply_tmp, df_motion_tmp
res = df_run_concepts.value_counts(['run_concept','motion_player']).reset_index().rename(columns={0:'count'})
res.sort_values(['run_concept','motion_player'], ascending=[True,False]).reset_index(drop=True)

  0%|          | 0/9 [00:00<?, ?it/s]

100%|██████████| 9/9 [00:13<00:00,  1.48s/it]


Unnamed: 0,run_concept,motion_player,count
0,COUNTER,True,141
1,COUNTER,False,177
2,DRAW,True,21
3,DRAW,False,122
4,FB RUN,True,19
5,FB RUN,False,12
6,INSIDE ZONE,True,417
7,INSIDE ZONE,False,768
8,MAN,True,333
9,MAN,False,526


In [3]:
df_run_concepts.query('run_concept=="TRICK"').motion_had_rush_attempt.value_counts()

1.0    112
0.0     21
Name: motion_had_rush_attempt, dtype: int64

In [4]:
df_run_concepts.query('run_concept!="TRICK"').motion_had_rush_attempt.value_counts()

0.0    1899
1.0      70
Name: motion_had_rush_attempt, dtype: int64

# Load the data

In [16]:
WEEK = 2

df_tracking = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'tracking_final.pkl'))
df_game = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'games_final.pkl'))
df_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'play_final.pkl'))
df_player_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'player_play_final.pkl'))
df_player = pd.read_pickle(join(PROCESSED_DATA_PATH, 'players.pkl'))
df_team = pd.read_pickle(join(PROCESSED_DATA_PATH, 'teams.pkl'))

# Drop plays where QB is the ball carrier

In [17]:
(
    df_player_play
    [['game_play_id','had_rush_attempt','nfl_id']]
    .query('had_rush_attempt == 1')
    .merge(df_player[['nfl_id','position']], on='nfl_id', how='left')
    .position.value_counts()
)

RB    552
WR     19
FB      4
QB      2
Name: position, dtype: int64

In [18]:
gpids_with_qb_rusher = (
    df_player_play
    [['game_play_id','had_rush_attempt','nfl_id']]
    .query('had_rush_attempt == 1')
    .merge(df_player[['nfl_id','position']], on='nfl_id', how='left')
    .query('position == "QB"')
    .game_play_id
    .unique()
    .tolist()
)
keep_gids = df_play.query('~game_play_id.isin(@gpids_with_qb_rusher)').game_id.unique().tolist()

if len(gpids_with_qb_rusher) != 0:
    print(f'Dropping {len(gpids_with_qb_rusher)} plays with QB rusher')
    df_tracking = df_tracking.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_play = df_play.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_player_play = df_player_play.query('~game_play_id.isin(@gpids_with_qb_rusher)')
    df_game = df_game.query('game_id.isin(@gpids_with_qb_rusher)')

Dropping 2 plays with QB rusher


# Verify all plays have 5 labeled oline players (LT, LT, C, RT, RT)

In [19]:
all_gpids = set(df_tracking.game_play_id.unique().tolist())

gpids_with_oline = (
    set(
        df_tracking
        .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
        .game_play_id
        .unique()
        .tolist()
    )
)

all_gpids - gpids_with_oline

set()

# Define primary RB on plays with multiple RBs

In [20]:
qb_xy = (
    df_tracking
    .query('position_by_loc == "QB"')
    [['game_play_id','frame_id','x','y']]
    .rename(columns={'x':'qb_x','y':'qb_y'})
)
oline_left_right = (
    df_tracking
    .query('position_by_loc.isin(["LT","RT"]) and frame_id == ball_snap_fid')
    [['game_play_id','position_by_loc','x']]
    .assign(oline_box_left=lambda x: np.where(x.position_by_loc == "LT", x.x, np.nan))
    .assign(oline_box_right=lambda x: np.where(x.position_by_loc == "RT", x.x, np.nan))
    .drop(columns=['position_by_loc','x'])
)

primary_rb = (
    df_tracking
    .query('position_by_loc == ["RB","FB","WR","TE"] and ball_snap_fid <= frame_id <= ball_snap_fid + 20')
        [['game_play_id','frame_id','ball_snap_fid','nfl_id','x','y','position_by_loc']]
    .merge(
        qb_xy,
        on=['game_play_id','frame_id'],
        how='left'
    )
    .merge(
        oline_left_right[['game_play_id','oline_box_left']].query('oline_box_left.notnull()'),
        on=['game_play_id'],
        how='left'
    )
    .merge(
        oline_left_right[['game_play_id','oline_box_right']].query('oline_box_right.notnull()'),
        on=['game_play_id'],
        how='left'
    )
    .query('oline_box_left - 1 <= x <= oline_box_right + 1')
    .assign(qb_y_at_snap=lambda x: np.where(x.frame_id == x.ball_snap_fid, x.qb_y, np.nan))
    .assign(rb_y_at_snap=lambda x: np.where(x.frame_id == x.ball_snap_fid, x.y, np.nan))
    .groupby('game_play_id',group_keys=False) 
    .apply(lambda group: group.ffill(axis=0)) 
    .reset_index(drop=True)
    .query('rb_y_at_snap < qb_y_at_snap + 1')
    .rename(columns={'position_by_loc':'rb_pos'})
)
primary_rb['dist_to_qb'] = np.sqrt((primary_rb['x'] - primary_rb['qb_x'])**2 + (primary_rb['y'] - primary_rb['qb_y'])**2)
positional_weight = {
    'RB': 1,
    'FB': 2,
    'WR': 3,
    'TE': 4,
}
primary_rb['pos_weight'] = primary_rb['rb_pos'].map(positional_weight)
primary_rb = (
    primary_rb
    .query('dist_to_qb <= 10')
    .sort_values(['pos_weight','dist_to_qb'], ascending=[True,True])
    .drop_duplicates('game_play_id', keep='first')
)
primary_rb['primary_rb'] = True
if 'primary_rb' in df_tracking.columns:
    df_tracking = df_tracking.drop(columns=['primary_rb'])
df_tracking = df_tracking.merge(primary_rb[['game_play_id','nfl_id','primary_rb']], on=['game_play_id','nfl_id'], how='left')
df_tracking = df_tracking.fillna({'primary_rb':False})
df_tracking = df_tracking.merge(primary_rb[['game_play_id','rb_pos']], on='game_play_id', how='left')
# del primary_rb, qb_xy

In [21]:
#plays w/o a primary rb
drop_plays = (
    df_tracking
    .sort_values('primary_rb', ascending=False)
    .drop_duplicates('game_play_id', keep='first')
    .query('~primary_rb')
    .game_play_id
    .unique().tolist()
)

if len(drop_plays) != 0:
    print(f'Dropping {len(drop_plays)} plays w/o a primary rb')
    print(drop_plays)
    df_tracking = df_tracking.query('~game_play_id.isin(@drop_plays)')
    df_play = df_play.query('~game_play_id.isin(@drop_plays)')
    df_player_play = df_player_play.query('~game_play_id.isin(@drop_plays)')

In [22]:
# gpid = "2022100900_1697"
# plot_play_with_speed(
#     df_tracking.query('last_line_set_fid <= frame_id and game_play_id==@gpid'),
#     gpid, 
#     every_other_frame=True, 
#     event_col='event_new',
#     plot_motion=False,
#     show_motion_frames=False,
#     highlight_offensive_positions=True,
#     highlight_oline=False,
#     highlight_primary_rb=True,
#     highlight_pullers=False,
# )

# Standardize play direction based on initial rb direction

In [23]:
rb_dir = (
    df_tracking
    .query('ball_snap_fid + 10 <= frame_id <= ball_snap_fid + 20 and primary_rb')
    [['game_play_id','dir']]
    .assign(dir=lambda x: np.where(x['dir'] > 270, 0, x['dir']))
    .groupby('game_play_id')
    .mean()
    .reset_index()
    .rename(columns={'dir':'rb_dir_post_snap'})
)
rb_dir['play_dir'] = np.where(
    rb_dir['rb_dir_post_snap'] < 90,
    'right',
    'left'
)
rb_dir['play_dir_location'] = np.select(
    [
        rb_dir['rb_dir_post_snap'] <= 45,
        rb_dir['rb_dir_post_snap'] <= 85,
        rb_dir['rb_dir_post_snap'] <= 95,
        rb_dir['rb_dir_post_snap'] <= 135,
    ],
    [
        'outside-right',
        'inside-right',
        'middle',
        'inside-left'
    ],
    default='outside-left'
)

cols = ['game_id', 'play_id', 'game_play_id', 'nfl_id', 'week', 'display_name',
       'frame_id', 'frame_type', 'time', 'jersey_number', 'club',
       'play_direction', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'event',
       'position', 'absolute_yardline_number', 'yards_to_go', 'offense',
       'defense', 'ball_x', 'ball_y', 'event_new', 'position_by_loc', 'motion_player',
       'ball_snap_fid','last_line_set_fid','primary_rb','rb_pos']
df_tracking = (
    df_tracking[cols]
    .merge(rb_dir[['game_play_id','play_dir','rb_dir_post_snap']], on='game_play_id', how='left')
        .merge(
        df_player_play[['game_play_id','nfl_id','had_rush_attempt']],
        on=['game_play_id','nfl_id'],
        how='left'
    )
)

In [24]:
# Mirror y accross field when rb_dir_post_snap is left
df_tracking['x'] = np.where(df_tracking['play_dir'] == 'left', 160/3 - df_tracking['x'], df_tracking['x'])
df_tracking['ball_x'] = np.where(df_tracking['play_dir'] == 'left', 160/3 - df_tracking['ball_x'], df_tracking['ball_x'])

# Mirror dir when rb_dir_post_snap is left
df_tracking['dir'] = np.where(
    df_tracking['play_dir'] == 'left', 
    (df_tracking['dir'] + 180) % 360, 
    df_tracking['dir']
)
df_tracking['rb_dir_post_snap'] = np.where(
    df_tracking['play_dir'] == 'left', 
    (df_tracking['rb_dir_post_snap'] + 180) % 360,
    df_tracking['rb_dir_post_snap']
)
df_tracking['o'] = np.where(
    df_tracking['play_dir'] == 'left', 
    (df_tracking['o'] + 180) % 360,
    df_tracking['o']
)
df_tracking['position_by_loc'] = np.where(
    df_tracking['play_dir'] == 'left',
    np.select(
        [
            df_tracking['position_by_loc'] == 'LT',
            df_tracking['position_by_loc'] == 'LG',
            df_tracking['position_by_loc'] == 'RG',
            df_tracking['position_by_loc'] == 'RT'
        ],
        [
            'RT',
            'RG',
            'LG',
            'LT'
        ],
        df_tracking['position_by_loc']
    ),
    df_tracking['position_by_loc']
)

# Offensive line box

In [None]:
# Feature of the min and max x values of the 5 offensive linemen at the snap
oline_at_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid and position_by_loc.isin(["LT","LG","C","RG","RT"])')
    [['game_play_id','position_by_loc','x','y']]
)
oline_x_min = oline_at_snap.groupby('game_play_id').x.min().reset_index().rename(columns={'x':'oline_x_left_at_snap'})
oline_x_max = oline_at_snap.groupby('game_play_id').x.max().reset_index().rename(columns={'x':'oline_x_right_at_snap'})
oline_y_min_right = (
    oline_at_snap
    .query('position_by_loc.isin(["C","RG","RT"])')
    .groupby('game_play_id')
    .y.min()
    .reset_index()
    .rename(columns={'y':'oline_y_min_right_at_snap'})
)
oline_y_min_left = (
    oline_at_snap
    .query('position_by_loc.isin(["LT","LG","C"])')
    .groupby('game_play_id')
    .y.min()
    .reset_index()
    .rename(columns={'y':'oline_y_min_left_at_snap'})
)
if 'oline_x_left_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_x_left_at_snap'], inplace=True)
if 'oline_x_right_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_x_right_at_snap'], inplace=True)
if 'oline_y_min_left_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_y_min_left_at_snap'], inplace=True)
if 'oline_y_min_right_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_y_min_right_at_snap'], inplace=True)
df_tracking = (
    df_tracking
    .merge(oline_x_min, on='game_play_id', how='left')
    .merge(oline_x_max, on='game_play_id', how='left')
    .merge(oline_y_min_left, on='game_play_id', how='left')
    .merge(oline_y_min_right, on='game_play_id', how='left')
)
del oline_at_snap, oline_x_min, oline_x_max, oline_y_min_left, oline_y_min_right

# Feature of Center x at ball snap
center_at_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid and position_by_loc == "C"')
    [['game_play_id','x']]
    .rename(columns={'x':'center_x_at_snap'})
)
if 'center_x_at_snap' in df_tracking.columns:
    df_tracking.drop(columns='center_x_at_snap', inplace=True)
df_tracking = df_tracking.merge(center_at_snap, on='game_play_id', how='left')
del center_at_snap

# Label Extra Players acting as part of the OLine
MAX_YARDS_BEHIND_OLINE = 1
MAX_YARDS_NEXT_TO_OLINE = 2
MOVING_S_THRESHOLD = 1
df_tracking['extra_on_oline_x_left_at_snap'] = df_tracking.oline_x_left_at_snap
df_tracking['extra_on_oline_x_right_at_snap'] = df_tracking.oline_x_right_at_snap
df_tracking['extra_on_oline_y_min_left_at_snap'] = df_tracking.oline_y_min_left_at_snap
df_tracking['extra_on_oline_y_min_right_at_snap'] = df_tracking.oline_y_min_right_at_snap
df_tracking['on_oline'] = np.where(
    df_tracking.position_by_loc.isin(["LT","LG","C","RG","RT"]),
    True,
    False
)
extra_on_oline = pd.DataFrame()
extra_on_oline_len = 1
while extra_on_oline_len != len(extra_on_oline):
    extra_on_oline = (
        df_tracking
        .query('(position_by_loc != "QB") and ~on_oline and offense and frame_id == ball_snap_fid')
        [['game_play_id','nfl_id','x','y','s',
        'extra_on_oline_y_min_left_at_snap','extra_on_oline_y_min_right_at_snap',
        'extra_on_oline_x_left_at_snap','extra_on_oline_x_right_at_snap','on_oline']]
    )
    
    # Save initial length to check if we need to run the loop again
    extra_on_oline_len = len(extra_on_oline)

    extra_on_oline['on_oline'] = np.where(
        (extra_on_oline.s < MOVING_S_THRESHOLD) &
        (
            (
                (extra_on_oline.x < extra_on_oline.extra_on_oline_x_left_at_snap) &
                (extra_on_oline.x >= extra_on_oline.extra_on_oline_x_left_at_snap - MAX_YARDS_NEXT_TO_OLINE) &
                (extra_on_oline.y >= extra_on_oline.extra_on_oline_y_min_left_at_snap - MAX_YARDS_BEHIND_OLINE)
            ) |
            (
                (extra_on_oline.x > extra_on_oline.extra_on_oline_x_right_at_snap) &
                (extra_on_oline.x <= extra_on_oline.extra_on_oline_x_right_at_snap + MAX_YARDS_NEXT_TO_OLINE) &
                (extra_on_oline.y >= extra_on_oline.extra_on_oline_y_min_right_at_snap - MAX_YARDS_BEHIND_OLINE)
            )
        ),
        True,
        False
    )

    # Update oline box left for the new players on the oline left side
    extra_on_online_x_min = (
        extra_on_oline
        .query('on_oline')
        [['game_play_id','nfl_id','x']]
        .groupby('game_play_id')
        .x.min()
        .reset_index()
        .rename(columns={'x':'extra_on_oline_x_left_at_snap'})
    )
    df_tracking = (
        df_tracking
        .merge(
            extra_on_online_x_min[['game_play_id','extra_on_oline_x_left_at_snap']], 
            on='game_play_id', 
            how='left', 
            suffixes=('','_new')
        )
        .assign(
            extra_on_oline_x_left_at_snap = lambda x: np.where(
                x.extra_on_oline_x_left_at_snap_new.notnull() &
                (x.extra_on_oline_x_left_at_snap_new < x.extra_on_oline_x_left_at_snap),
                x.extra_on_oline_x_left_at_snap_new,
                x.extra_on_oline_x_left_at_snap
            )
        )
        .drop(columns='extra_on_oline_x_left_at_snap_new')
    )

    # Update oline box right for the new players on the oline right side
    extra_on_online_x_max = (
        extra_on_oline
        .query('on_oline')
        [['game_play_id','nfl_id','x']]
        .groupby('game_play_id')
        .x.max()
        .reset_index()
        .rename(columns={'x':'extra_on_oline_x_right_at_snap'})
    )
    df_tracking = (
        df_tracking
        .merge(
            extra_on_online_x_max[['game_play_id','extra_on_oline_x_right_at_snap']], 
            on='game_play_id', 
            how='left', 
            suffixes=('','_new')
        )
        .assign(
            extra_on_oline_x_right_at_snap = lambda x: np.where(
                x.extra_on_oline_x_right_at_snap_new.notnull() &
                (x.extra_on_oline_x_right_at_snap_new > x.extra_on_oline_x_right_at_snap),
                x.extra_on_oline_x_right_at_snap_new,
                x.extra_on_oline_x_right_at_snap
            )
        )
        .drop(columns='extra_on_oline_x_right_at_snap_new')
    )

    # Update on_oline for the new players on the oline
    on_oline = extra_on_oline.query('on_oline')[['game_play_id','nfl_id','on_oline']]
    df_tracking = (
        df_tracking.merge(
            on_oline,
            on=['game_play_id','nfl_id'],
            how='left',
            suffixes=('','_new')
        )
        .assign(
            on_oline = lambda x: np.where(
                x.on_oline_new.notnull(),
                x.on_oline_new,
                x.on_oline
            )
        )
        .drop(columns='on_oline_new')
    )

    oline_y_min_left = (
        df_tracking
        .query('on_oline and x < center_x_at_snap')
        [['game_play_id','y']]
        .groupby('game_play_id')
        .y.min()
        .reset_index()
        .rename(columns={'y':'extra_on_oline_y_min_left_at_snap'})
    )
    oline_y_min_right = (
        df_tracking
        .query('on_oline and x > center_x_at_snap')
        [['game_play_id','y']]
        .groupby('game_play_id')
        .y.min()
        .reset_index()
        .rename(columns={'y':'extra_on_oline_y_min_right_at_snap'})
    )
    df_tracking = (
        df_tracking
        .drop(columns=['extra_on_oline_y_min_left_at_snap','extra_on_oline_y_min_right_at_snap'])
        .merge(oline_y_min_left, on='game_play_id', how='left')
        .merge(oline_y_min_right, on='game_play_id', how='left')
    )

    extra_on_oline = extra_on_oline.query('~on_oline')
    del extra_on_online_x_min, extra_on_online_x_max, on_oline, oline_y_min_left, oline_y_min_right

del extra_on_oline

# Offensive line movement angle

In [165]:
oline_xy_at_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid and position_by_loc.isin(["LT","LG","C","RG","RT"])')
    [['game_play_id','nfl_id','x','y']]
    .rename(columns={'x':'oline_x_at_snap','y':'oline_y_at_snap'})
)
if 'oline_x_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_x_at_snap'], inplace=True)
if 'oline_y_at_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_y_at_snap'], inplace=True)
df_tracking = df_tracking.merge(oline_xy_at_snap, on=['game_play_id','nfl_id'], how='left')
del oline_xy_at_snap

oline_xy_1s_after_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid + 10 and position_by_loc.isin(["LT","LG","C","RG","RT"])')
    [['game_play_id','nfl_id','x','y']]
    .rename(columns={'x':'oline_x_1s_after_snap','y':'oline_y_1s_after_snap'})
)
if 'oline_x_1s_after_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_x_1s_after_snap'], inplace=True)
if 'oline_y_1s_after_snap' in df_tracking.columns:
    df_tracking.drop(columns=['oline_y_1s_after_snap'], inplace=True)
df_tracking = df_tracking.merge(oline_xy_1s_after_snap, on=['game_play_id','nfl_id'], how='left')
del oline_xy_1s_after_snap

df_tracking['dy_oline_1s_after_snap'] = df_tracking.oline_y_1s_after_snap - df_tracking.oline_y_at_snap
df_tracking['dx_oline_1s_after_snap'] = df_tracking.oline_x_1s_after_snap - df_tracking.oline_x_at_snap
df_tracking['oline_angle_1s_after_snap'] = np.degrees(np.arctan2(
    df_tracking.dy_oline_1s_after_snap, 
    df_tracking.dx_oline_1s_after_snap
))
df_tracking['oline_angle_1s_after_snap'] = np.where(
    df_tracking.oline_angle_1s_after_snap < 0,
    360 + df_tracking.oline_angle_1s_after_snap,
    df_tracking.oline_angle_1s_after_snap
)

# Generate Features to classify run concept

In [166]:
df_run_concept = (
    df_play[['game_play_id','offense_formation','pff_run_concept_primary']]
    .rename(columns={'pff_run_concept_primary':'run_concept'})            
)
df_run_concept = df_run_concept.merge(
    (
        df_tracking[['game_play_id','rb_dir_post_snap']]
        .drop_duplicates('game_play_id')
    ),
    on='game_play_id',
    how='left'
)

viable_run_concepts = ['MAN', 'OUTSIDE ZONE', 'TRAP', 'COUNTER', 'POWER', 
                       'PULL LEAD', 'INSIDE ZONE', 'TRICK', 'DRAW']
df_run_concept = df_run_concept.query('run_concept in @viable_run_concepts')

In [167]:
avg_oline_angle_1s_after_snap = (
    df_tracking
    .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
    .groupby('game_play_id')
    .oline_angle_1s_after_snap
    .mean()
    .reset_index()
    .rename(columns={'oline_angle_1s_after_snap':'avg_oline_angle_1s_after_snap'})
)
df_run_concept = df_run_concept.merge(avg_oline_angle_1s_after_snap, on='game_play_id', how='left')
del avg_oline_angle_1s_after_snap

variance_oline_angle_1s_after_snap = (
    df_tracking
    .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
    .groupby('game_play_id')
    .oline_angle_1s_after_snap
    .var()
    .reset_index()
    .rename(columns={'oline_angle_1s_after_snap':'var_oline_angle_1s_after_snap'})
)
df_run_concept = df_run_concept.merge(variance_oline_angle_1s_after_snap, on='game_play_id', how='left')
del variance_oline_angle_1s_after_snap

avg_oline_angle_1s_after_snap_4_rightmost_oline = (
    df_tracking
    .query('position_by_loc.isin(["LG","C","RG","RT"])')
    .groupby('game_play_id')
    .oline_angle_1s_after_snap
    .mean()
    .reset_index()
    .rename(columns={'oline_angle_1s_after_snap':'avg_oline_angle_1s_after_snap_4_rightmost_oline'})
)
df_run_concept = df_run_concept.merge(avg_oline_angle_1s_after_snap_4_rightmost_oline, on='game_play_id', how='left')
del avg_oline_angle_1s_after_snap_4_rightmost_oline

variance_oline_angle_1s_after_snap_4_rightmost_oline = (
    df_tracking
    .query('position_by_loc.isin(["LG","C","RG","RT"])')
    .groupby('game_play_id')
    .oline_angle_1s_after_snap
    .var()
    .reset_index()
    .rename(columns={'oline_angle_1s_after_snap':'var_oline_angle_1s_after_snap_4_rightmost_oline'})
)
df_run_concept = df_run_concept.merge(variance_oline_angle_1s_after_snap_4_rightmost_oline, on='game_play_id', how='left')
del variance_oline_angle_1s_after_snap_4_rightmost_oline


avg_oline_dx_1s_after_snap = (
    df_tracking
    .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
    .groupby('game_play_id')
    .dx_oline_1s_after_snap
    .mean()
    .reset_index()
    .rename(columns={'dx_oline_1s_after_snap':'avg_oline_dx_1s_after_snap'})
)
df_run_concept = df_run_concept.merge(avg_oline_dx_1s_after_snap, on='game_play_id', how='left')
del avg_oline_dx_1s_after_snap

avg_oline_dy_1s_after_snap = (
    df_tracking
    .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
    .groupby('game_play_id')
    .dx_oline_1s_after_snap
    .mean()
    .reset_index()
    .rename(columns={'dy_oline_1s_after_snap':'avg_oline_dy_1s_after_snap'})
)
df_run_concept = df_run_concept.merge(avg_oline_dy_1s_after_snap, on='game_play_id', how='left')
del avg_oline_dy_1s_after_snap

In [168]:
# Identify the pulling players (come from backside and cross pass the center behind the los)
center_xy = (
    df_tracking
    .query('position_by_loc == "C" and ball_snap_fid <= frame_id <= ball_snap_fid + 20')
    [['game_play_id','frame_id','x','y']]
    .rename(columns={'x':'center_x','y':'center_y'})
)
rt_xy = (
    df_tracking
    .query('position_by_loc == "RT" and ball_snap_fid <= frame_id <= ball_snap_fid + 20')
    [['game_play_id','frame_id','x','y']]
    .rename(columns={'x':'rt_x','y':'rt_y'})
)
rt_x_at_snap = (
    df_tracking
    .query('position_by_loc == "RT" and ball_snap_fid == frame_id')
    [['game_play_id','x']]
    .rename(columns={'x':'rt_x_at_snap'})
)
x_at_snap = (
    df_tracking
    .query('ball_snap_fid == frame_id')
    [['game_play_id','nfl_id','x']]
    .rename(columns={'x':'x_at_snap'})
)
x_2s_after_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid + 20')
    [['game_play_id','nfl_id','x']]
    .rename(columns={'x':'x_2s_after_snap'})
)
pullers_left_of_c = (
    df_tracking
    [['game_play_id','frame_id','nfl_id','x','y','position_by_loc','ball_snap_fid','on_oline',
      'center_x_at_snap','absolute_yardline_number']]
    .merge(x_at_snap, on=['game_play_id','nfl_id'], how='left')
    .merge(x_2s_after_snap, on=['game_play_id','nfl_id'], how='left')
    .assign(dx=lambda x: x.x_2s_after_snap - x.x_at_snap)
    .query('on_oline and x_at_snap < center_x_at_snap and ball_snap_fid <= frame_id <= ball_snap_fid + 20 ' + \
           'and ~position_by_loc.isin(["C","RG","RT"]) and y < absolute_yardline_number')
    .merge(center_xy, on=['game_play_id','frame_id'], how='left')
    .query('x > center_x and y < center_y and dx > 1')
    .drop_duplicates(['game_play_id','nfl_id'], keep='first')
)
pullers_left_of_c['puller_left_of_center'] = True
if 'puller_left_of_center' in df_tracking.columns:
    df_tracking.drop(columns=['puller_left_of_center'], inplace=True)
df_tracking = df_tracking.merge(pullers_left_of_c[['game_play_id','nfl_id','puller_left_of_center']], on=['game_play_id','nfl_id'], how='left')
df_tracking = df_tracking.fillna({'puller_left_of_center':False})
del pullers_left_of_c

pullers_left_of_rt = (
    df_tracking
    [['game_play_id','frame_id','nfl_id','x','y','position_by_loc','ball_snap_fid','on_oline',
      'absolute_yardline_number']]
    .merge(x_at_snap, on=['game_play_id','nfl_id'], how='left')
    .merge(x_2s_after_snap, on=['game_play_id','nfl_id'], how='left')
    .merge(rt_x_at_snap, on=['game_play_id'], how='left')
    .assign(dx=lambda x: x.x_2s_after_snap - x.x_at_snap)
    .query('on_oline and x_at_snap < rt_x_at_snap and ball_snap_fid <= frame_id <= ball_snap_fid + 30 ' + \
           'and ~position_by_loc.isin(["RT"]) and y < absolute_yardline_number')
    .merge(rt_xy, on=['game_play_id','frame_id'], how='left')
    .query('x > rt_x and y < rt_y and dx > 1')
    .drop_duplicates(['game_play_id','nfl_id'], keep='first')
)
pullers_left_of_rt['puller_left_of_rt'] = True
if 'puller_left_of_rt' in df_tracking.columns:
    df_tracking.drop(columns=['puller_left_of_rt'], inplace=True)
df_tracking = df_tracking.merge(pullers_left_of_rt[['game_play_id','nfl_id','puller_left_of_rt']], on=['game_play_id','nfl_id'], how='left')
df_tracking = df_tracking.fillna({'puller_left_of_rt':False})
del pullers_left_of_rt

puller_is_right_gaurd = (
    df_tracking
    [['game_play_id','frame_id','nfl_id','x','y','position_by_loc','ball_snap_fid','on_oline',
      'center_x_at_snap','absolute_yardline_number']]
    .merge(x_at_snap, on=['game_play_id','nfl_id'], how='left')
    .merge(x_2s_after_snap, on=['game_play_id','nfl_id'], how='left')
    .assign(dx=lambda x: x.x_2s_after_snap - x.x_at_snap)
    .query('ball_snap_fid <= frame_id <= ball_snap_fid + 20 ' + \
           'and position_by_loc.isin(["RG"]) and y < absolute_yardline_number')
    .merge(rt_xy, on=['game_play_id','frame_id'], how='left')
    .query('x > rt_x and y < rt_y and dx > 1')
    .drop_duplicates(['game_play_id','nfl_id'], keep='first')
)
puller_is_right_gaurd['puller_is_right_gaurd'] = True
if 'puller_is_right_gaurd' in df_tracking.columns:
    df_tracking.drop(columns=['puller_is_right_gaurd'], inplace=True)
df_tracking = df_tracking.merge(puller_is_right_gaurd[['game_play_id','nfl_id','puller_is_right_gaurd']], on=['game_play_id','nfl_id'], how='left')
df_tracking = df_tracking.fillna({'puller_is_right_gaurd':False})
del puller_is_right_gaurd

In [169]:
n_pullers_left_of_c = (
    df_tracking
    .query('puller_left_of_center')
    .drop_duplicates(['game_play_id','nfl_id'])
    .groupby('game_play_id')
    .puller_left_of_center
    .sum()
    .reset_index()
    .rename(columns={'puller_left_of_center':'n_pullers_left_of_center'})
)
if 'n_pullers_left_of_center' in df_run_concept.columns:
    df_run_concept.drop(columns=['n_pullers_left_of_center'], inplace=True)
df_run_concept = df_run_concept.merge(n_pullers_left_of_c, on='game_play_id', how='left')
df_run_concept = df_run_concept.fillna({'n_pullers_left_of_center':0})
del n_pullers_left_of_c

# n_pullers_left_of_rt = (
#     df_tracking
#     .query('puller_left_of_rt')
#     .drop_duplicates(['game_play_id','nfl_id'])
#     .groupby('game_play_id')
#     .puller_left_of_rt
#     .sum()
#     .reset_index()
#     .rename(columns={'puller_left_of_rt':'n_pullers_left_of_rt'})
# )
# if 'n_pullers_left_of_rt' in df_run_concept.columns:
#     df_run_concept.drop(columns=['n_pullers_left_of_rt'], inplace=True)
# df_run_concept = df_run_concept.merge(n_pullers_left_of_rt, on='game_play_id', how='left')
# del n_pullers_left_of_rt

n_pullers_is_right_gaurd = (
    df_tracking
    .query('puller_is_right_gaurd')
    .drop_duplicates(['game_play_id','nfl_id'])
    .groupby('game_play_id')
    .puller_is_right_gaurd
    .sum()
    .reset_index()
    .rename(columns={'puller_is_right_gaurd':'n_pullers_is_right_gaurd'})
)
if 'n_pullers_is_right_gaurd' in df_run_concept.columns:
    df_run_concept.drop(columns=['n_pullers_is_right_gaurd'], inplace=True)
df_run_concept = df_run_concept.merge(n_pullers_is_right_gaurd, on='game_play_id', how='left')
df_run_concept = df_run_concept.fillna({'n_pullers_is_right_gaurd':0})
df_run_concept.rename(columns={'n_pullers_is_right_gaurd':'right_gaurd_pulls'}, inplace=True)
del n_pullers_is_right_gaurd

puller_behind_los_3s_after_snap = (
    df_tracking
    .query('frame_id == ball_snap_fid + 30 and ' +\
           '(puller_left_of_rt or puller_left_of_center or puller_is_right_gaurd) and ' +\
           'y < absolute_yardline_number')
    .groupby('game_play_id')
    .nfl_id
    .count()
    .reset_index()
    .rename(columns={'nfl_id':'n_puller_behind_los_3s_after_snap'})
)
if 'n_puller_behind_los_3s_after_snap' in df_run_concept.columns:
    df_run_concept.drop(columns=['n_puller_behind_los_3s_after_snap'], inplace=True)
df_run_concept = df_run_concept.merge(puller_behind_los_3s_after_snap, on='game_play_id', how='left')
df_run_concept = df_run_concept.fillna({'n_puller_behind_los_3s_after_snap':0})
del puller_behind_los_3s_after_snap
    

# Reformat the data

In [170]:
# One hot encode offense formation
if 'offense_formation' in df_run_concept.columns:
    df_run_concept['shotgun'] = np.where(df_run_concept.offense_formation == 'SHOTGUN', 1, 0)
    df_run_concept['singleback'] = np.where(df_run_concept.offense_formation == 'SINGLEBACK', 1, 0)
    df_run_concept['i_form'] = np.where(df_run_concept.offense_formation == 'I_FORM', 1, 0)
    df_run_concept['pistol'] = np.where(df_run_concept.offense_formation == 'PISTOL', 1, 0)
    df_run_concept['jumbo'] = np.where(df_run_concept.offense_formation == 'JUMBO', 1, 0)
    df_run_concept.drop(columns=['offense_formation'], inplace=True)

# move run_concept to the end
cols = df_run_concept.columns.tolist()
cols.remove('run_concept')
df_run_concept = df_run_concept[cols + ['run_concept']]
df_run_concept.head()

Unnamed: 0,game_play_id,rb_dir_post_snap,avg_oline_angle_1s_after_snap,var_oline_angle_1s_after_snap,avg_oline_angle_1s_after_snap_4_rightmost_oline,var_oline_angle_1s_after_snap_4_rightmost_oline,avg_oline_dx_1s_after_snap,dx_oline_1s_after_snap,n_pullers_left_of_center,right_gaurd_pulls,n_puller_behind_los_3s_after_snap,shotgun,singleback,i_form,pistol,jumbo,run_concept
0,2022110605_3861,59.339091,162.437078,24371.89855,195.154105,25114.338888,0.626,0.626,0.0,0.0,0.0,1,0,0,0,0,INSIDE ZONE
1,2022110300_1182,295.484545,120.613286,6538.233344,97.03178,5390.891111,-0.094,-0.094,0.0,0.0,0.0,0,1,0,0,0,TRAP
2,2022110600_2292,47.077273,20.5454,133.059014,23.596481,119.757918,2.47,2.47,0.0,0.0,0.0,0,0,0,1,0,OUTSIDE ZONE
3,2022110604_328,39.861818,213.536723,12571.07133,256.83478,6333.968089,0.092,0.092,0.0,0.0,0.0,1,0,0,0,0,POWER
4,2022110300_2479,315.269091,205.458357,5713.120468,202.938527,7112.343677,-0.198,-0.198,1.0,0.0,1.0,1,0,0,0,0,POWER


In [171]:
df_run_concept.shape

(497, 17)

In [None]:
df_run_concept = df_run_concept.merge(rb_dir[['game_play_id','play_dir','play_dir_location']], on='game_play_id', how='left')

In [172]:
df_run_concept.to_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'run_concept.pkl'))