In [1]:
import os
import sys
from os.path import join
import json

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import nfl_data_py as nfl

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(ROOT_DIR,'py'))

import util
from plot.plot_simple import plot_play_with_speed

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

with open("paths.json", 'r') as f:
    paths = json.load(f)

PROCESSED_DATA_PATH = paths['processed_data']

In [23]:
WEEK = 1

df_tracking = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'tracking_final.pkl'))
df_game = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'games_final.pkl'))
df_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'play_final.pkl'))
df_player_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'player_play_final.pkl'))
df_player = pd.read_pickle(join(PROCESSED_DATA_PATH, 'players.pkl'))
df_team = pd.read_pickle(join(PROCESSED_DATA_PATH, 'teams.pkl'))

In [24]:
df_tracking.head()

Unnamed: 0,game_id,play_id,game_play_id,nfl_id,week,display_name,frame_id,frame_type,time,jersey_number,club,play_direction,x,y,s,a,dis,o,dir,event,position,absolute_yardline_number,yards_to_go,offense,defense,ball_x,ball_y,euclidean_dist_to_ball,dx_to_ball,dy_to_ball,event_new,position_by_loc,motion_player,ball_snap_fid,last_line_set_fid,oline_box_left,oline_box_right,center_x_at_line_set,first_line_set_fid,y_min_oline_left,y_min_oline_right,extra_on_oline,extra_oline_box_left,extra_oline_box_right
0,2022091200,64,2022091200_64,35459.0,1,Kareem Jackson,1,BEFORE_SNAP,2022-09-13 00:16:03.5,22.0,DEN,right,24.75,51.06,0.72,0.37,0.07,293.83,111.66,huddle_break_offense,SS,40,10,False,True,29.429999,39.470001,12.499219,-4.679999,11.589999,,SS,False,114,95,26.42,32.49,29.21,62,,,False,26.42,32.49
1,2022091200,64,2022091200_64,35459.0,1,Kareem Jackson,2,BEFORE_SNAP,2022-09-13 00:16:03.6,22.0,DEN,right,24.73,51.13,0.71,0.36,0.07,294.59,108.79,,SS,40,10,False,True,29.429999,39.48,12.562345,-4.699999,11.65,,SS,False,114,95,26.42,32.49,29.21,62,,,False,26.42,32.49
2,2022091200,64,2022091200_64,35459.0,1,Kareem Jackson,3,BEFORE_SNAP,2022-09-13 00:16:03.7,22.0,DEN,right,24.71,51.2,0.69,0.23,0.07,295.55,110.1,,SS,40,10,False,True,29.429999,39.48,12.634746,-4.719999,11.72,,SS,False,114,95,26.42,32.49,29.21,62,,,False,26.42,32.49
3,2022091200,64,2022091200_64,35459.0,1,Kareem Jackson,4,BEFORE_SNAP,2022-09-13 00:16:03.8,22.0,DEN,right,24.68,51.26,0.67,0.22,0.07,295.55,112.02,,SS,40,10,False,True,29.429999,39.48,12.70161,-4.749999,11.78,,SS,False,114,95,26.42,32.49,29.21,62,,,False,26.42,32.49
4,2022091200,64,2022091200_64,35459.0,1,Kareem Jackson,5,BEFORE_SNAP,2022-09-13 00:16:03.9,22.0,DEN,right,24.65,51.32,0.65,0.34,0.07,294.26,117.17,,SS,40,10,False,True,29.429999,39.48,12.768477,-4.779999,11.84,,SS,False,114,95,26.42,32.49,29.21,62,,,False,26.42,32.49


In [25]:
# drop everything before first line_set event
cols = [
    'game_play_id', 'frame_id','frame_type', 'event_new', 'nfl_id', 'position_by_loc', 
    'extra_oline_box_left', 'oline_box_left', 'center_x_at_line_set', 'oline_box_right', 
    'extra_oline_box_right', 'x', 'y', 's', 'a', 'o', 'dir', 'motion_player'
]
df_motion_and_shifts = df_tracking.query(
    'frame_id >= first_line_set_fid and ' +
    'frame_id <= ball_snap_fid + 10 and ' +
    'offense'
)[cols].copy()

# line_set window is each set of frame between line_set events
df_motion_and_shifts = df_motion_and_shifts.sort_values(['game_play_id','frame_id'])
line_set_windows = (
    df_motion_and_shifts
    .query('event_new == "line_set"')  # Only consider 'line_set' events
    .drop_duplicates(['game_play_id', 'frame_id'])  # Ensure unique frame per event
    .assign(line_set_window_number=lambda x: x.groupby('game_play_id').cumcount())  # Number line_set events within each game_play_id
)

# Step 3: Merge back to the original DataFrame
df_motion_and_shifts = df_motion_and_shifts.merge(
    line_set_windows[['game_play_id', 'frame_id', 'line_set_window_number']],
    on=['game_play_id', 'frame_id'],
    how='left'  # Merge without dropping rows from the original DataFrame
)

# drop plays without at occurence of motion_player
motion_gids = (
    df_motion_and_shifts
    .query('motion_player')
    .game_play_id
    .unique()
    .tolist()
)
df_motion_and_shifts = df_motion_and_shifts[df_motion_and_shifts.game_play_id.isin(motion_gids)]

df_motion_and_shifts['line_set_window_number'] = df_motion_and_shifts['line_set_window_number'].fillna(method='ffill').astype(int)

qb_x_last_line_set = (
    df_tracking
    .query('frame_id==last_line_set_fid and position=="QB"')
    .set_index('game_play_id')
    [['x','y']]
    .rename(columns={'x':'qb_x_last_line_set', 'y':'qb_y_last_line_set'})
    .reset_index()
)

df_motion_and_shifts = df_motion_and_shifts.merge(qb_x_last_line_set, on='game_play_id')

In [26]:
MOVING_THRESHOLD = 1.0
N_FRAMES_NOT_MOVING = 15

tqdm.pandas()

df_motion_cpy = df_motion_and_shifts.query('motion_player').copy()

# Step 2: Define the 'moving' column based on the threshold
df_motion_cpy['moving'] = df_motion_cpy['s'] >= MOVING_THRESHOLD

# Step 3: Create motion_frame column
def find_motion_frames(group):
    motion_frame = [False] * len(group)
    moving_array = group['moving'].values
    
    # Work backwards from the last frame
    last_idx = group.reset_index(drop=True).query('frame_type=="SNAP"').index[0] - 1

    # Find the first "moving" frame starting from the end
    first_moving_idx = None
    for idx in range(last_idx, -1, -1):
        if moving_array[idx]:
            first_moving_idx = idx
            break

    last_moving_idx = None
    if first_moving_idx is not None:
        for idx in range(first_moving_idx, last_idx + 1):
            if moving_array[idx]:
                last_moving_idx = idx
    
    if first_moving_idx is not None:
        # Set all frames after the first moving frame to True
        for idx in range(first_moving_idx, last_idx + 1):
            motion_frame[idx] = True

        # Move backwards, stopping when 10 consecutive frames are not moving
        consecutive_not_moving = 0
        for idx in range(first_moving_idx, -1, -1):
            if not moving_array[idx]:
                consecutive_not_moving += 1
                if consecutive_not_moving > N_FRAMES_NOT_MOVING:
                    # Stop marking frames as part of the motion
                    break
                continue
            else:
                for i in range(1, consecutive_not_moving+1):
                    motion_frame[idx+i] = True
                consecutive_not_moving = 0  # Reset counter if moving
            motion_frame[idx] = True

    # Set all frames to False from last_moving_idx to the end
    if last_moving_idx is not None:
        for idx in range(last_moving_idx + 1, len(group)):
            motion_frame[idx] = False

    # Update the group's motion_frame column
    group['motion_frame'] = motion_frame
    return group

# Apply the logic group-wise for each game_play_id
df_motion_cpy = df_motion_cpy.groupby('game_play_id', group_keys=False).progress_apply(find_motion_frames)

# drop game_play_ids with no motion frames
motion_gids = df_motion_cpy.query('motion_frame').game_play_id.unique()
df_motion_cpy = df_motion_cpy.query('game_play_id in @motion_gids')

100%|██████████| 266/266 [00:00<00:00, 1176.65it/s]


In [27]:
if 'motion_frame' in df_tracking.columns:
    df_tracking.drop(columns='motion_frame', inplace=True)

df_tracking = df_tracking.merge(df_motion_cpy[['game_play_id','frame_id','motion_frame']], on=['game_play_id','frame_id'], how='left')
df_tracking['motion_frame'] = df_tracking['motion_frame'].fillna(False)

In [28]:
# df_play_alignments = df_tracking.query('offense')[['game_play_id','nfl_id']].copy().drop_duplicates()
# df_play_alignments['alignment'] = None

# df_last_line_set = (
#     df_tracking
#     .query('frame_type=="BEFORE_SNAP" and offense').copy()
#     [['game_play_id','frame_id','nfl_id','last_line_set_fid','position_by_loc','x','y',
#         'oline_box_left','oline_box_right']]
# )

# df_motion_first_frame = (
#     df_tracking
#     .query('motion_frame')
#     [['game_play_id','frame_id']]
#     .sort_values(['game_play_id','frame_id'])
#     .drop_duplicates('game_play_id', keep='first')
#     .rename(columns={'frame_id':'frame_id_first_motion'})
# )

# df_last_line_set= df_last_line_set.merge(df_motion_first_frame, on='game_play_id', how='left')

# # create new last_line_set_fid_new which is frame_id_first_motion if not nan, else last_line_set_fid
# df_last_line_set['last_line_set_fid_new'] = df_last_line_set['frame_id_first_motion'].fillna(df_last_line_set['last_line_set_fid'])

# df_last_line_set = (
#     df_last_line_set
#     .drop(columns=['frame_id_first_motion','last_line_set_fid'])
#     .query('frame_id == last_line_set_fid_new')
#     .drop(columns='last_line_set_fid_new')
#     .rename(columns={'last_line_set_fid_new':'frame_id'})
# )
    
# y_oline_min = (
#     df_last_line_set
#     .query('position_by_loc.isin(["LT","LG","C","RG","RT"])')
#     [['game_play_id','y']]
#     .sort_values('y', ascending=True)
#     .drop_duplicates('game_play_id', keep='first')
#     .rename(columns={'y':'y_oline_min'})
# )

# if 'y_oline_min' in df_last_line_set.columns:
#     df_last_line_set.drop(columns='y_oline_min', inplace=True)

# df_last_line_set = df_last_line_set.merge(y_oline_min, on='game_play_id')

# closest_wr_to_los = (
#     df_last_line_set
#     .query(
#         'position_by_loc.isin(["WR","TE","RB","FB"]) and ' +\
#         # 'y >= y_oline_min - 0.5 and ' +\
#         '(x < oline_box_left or x > oline_box_right)'
#     )
#     .sort_values(['game_play_id','y'], ascending=[True,False])
# )

# x_wr = (
#     closest_wr_to_los
#     .query('position_by_loc=="WR"')
#     .drop_duplicates('game_play_id', keep='first')
#     [['game_play_id','nfl_id']]
#     .rename(columns={'nfl_id':'x_wr_nfl_id'})
# )

# df_play_alignments = df_play_alignments.merge(x_wr, on='game_play_id', how='left')
# df_play_alignments['alignment'] = np.where(
#     ~df_play_alignments['x_wr_nfl_id'].isna(),
#     'X',
#     df_play_alignments['alignment']
# )

In [29]:
cols = ['game_play_id', 'nfl_id', 'frame_id', 'frame_type', 'event_new', 'motion_frame',
    'position_by_loc', 'extra_oline_box_left', 'oline_box_left', 'center_x_at_line_set', 
    'oline_box_right', 'extra_oline_box_right', 'qb_x_last_line_set','qb_y_last_line_set', 
    'x', 'y', 's', 'a', 'o', 'dir', 'line_set_window_number','moving']
df_motion = df_motion_cpy.query('motion_player')[cols]

df_motion = (
    df_motion.merge(
        df_player_play[['game_play_id','nfl_id','had_rush_attempt']],
        how='left',
        on=['game_play_id','nfl_id']
    )
)

if 'absolute_yardline_number' not in df_motion.columns:
    df_motion = df_motion.merge(
        df_play[['game_play_id','absolute_yardline_number']],
        on='game_play_id',
        how='left'
    )

df_motion_first_frame = df_motion.drop_duplicates('game_play_id', keep='first').copy()

# Classify First Frame as "Off-Line Y"
df_motion_first_frame['initial_alignment'] = np.where(
    df_motion_first_frame['position_by_loc'].isin(['TE','RB','FB','WR']) &
    (
        (df_motion_first_frame['y'] < df_motion_first_frame['absolute_yardline_number']-1) &
        (df_motion_first_frame['y'] > df_motion_first_frame['absolute_yardline_number'] - 4)
    ),
    np.where(
        (
            (df_motion_first_frame['x'] < df_motion_first_frame['extra_oline_box_left']) &
            (df_motion_first_frame['x'] > df_motion_first_frame['extra_oline_box_left'] - 3)
        ) |
        (
            (df_motion_first_frame['x'] > df_motion_first_frame['extra_oline_box_right']) &
            (df_motion_first_frame['x'] < df_motion_first_frame['extra_oline_box_right'] + 3)
        ),
        'Off-Line Y',
        None
    ),
    None
)

if 'initial_alignment' in df_motion.columns:
    df_motion.drop(columns='initial_alignment', inplace=True)
df_motion = df_motion.merge(
    df_motion_first_frame[['game_play_id','initial_alignment']], 
    on=['game_play_id'],
    how='left'
)
del df_motion_first_frame

# left side negative, right side positive
df_motion['dx_outside_oline'] = np.where(
    df_motion['x'] < df_motion['extra_oline_box_left'],
    df_motion['x'] - df_motion['extra_oline_box_left'],
    np.where(
        df_motion['x'] > df_motion['extra_oline_box_right'],
        df_motion['x'] - df_motion['extra_oline_box_right'],
        None
    )
)

# negative if closer to left sideline, positive if closer to right sideline
df_motion['dx_inside_oline'] = np.where(
    (df_motion['extra_oline_box_left'] < df_motion['x']) &
    (df_motion['x'] < df_motion['extra_oline_box_right']),
    np.where(
        df_motion['x'] < df_motion['center_x_at_line_set'],
        df_motion['extra_oline_box_left'] - df_motion['x'],
        df_motion['extra_oline_box_right'] - df_motion['x']
    ),
    None
)

# cross the line of scrimmage if play with motion_frame has both dx_outside_oline < 0 and dx_outside_oline > 0
df_motion['oline_side'] = np.where(
    df_motion['dx_outside_oline'] < 0,
    'outside-left',
    np.where(
        df_motion['dx_outside_oline'] > 0,
        'outside-right',
        np.where(
            (df_motion['dx_inside_oline'] < 0) &
            (df_motion['dx_inside_oline'] > -1),
            'inside-oline-left',
            np.where(
                (df_motion['dx_inside_oline'] > 0) &
                (df_motion['dx_inside_oline'] < 1),
                'inside-oline-right',
                'within-oline'
            )
        )
    )
)
outside_left_gids = (
    df_motion
    .query('motion_frame and oline_side=="outside-left"')
    .game_play_id
    .unique()
    .tolist()
)
outside_right_gids = (
    df_motion
    .query('motion_frame and oline_side=="outside-right"')
    .game_play_id
    .unique()
    .tolist()
)
crossing_oline_gids = (
    df_motion
    .query('game_play_id.isin(@outside_left_gids) and oline_side.isin(["inside-oline-right","outside-right"])')
    .game_play_id
    .unique()
    .tolist()
)
crossing_oline_gids += (
    df_motion
    .query('game_play_id.isin(@outside_right_gids) and oline_side.isin(["inside-oline-left","outside-left"])')
    .game_play_id
    .unique()
    .tolist()
)
df_motion['motion_crosses_over_oline'] = np.where(
    df_motion['game_play_id'].isin(crossing_oline_gids),
    True,
    False
)
del crossing_oline_gids, outside_left_gids, outside_right_gids

first_motion_frame_side = (
    df_motion
    .query('motion_frame')
    .drop_duplicates('game_play_id', keep='first')
    [['game_play_id','oline_side']]
    .rename(columns={'oline_side':'oline_side_first'})
)
df_motion = df_motion.merge(first_motion_frame_side, on='game_play_id', how='left')
del first_motion_frame_side
last_presnap_frame_side = (
    df_motion
    .query('frame_type=="BEFORE_SNAP"')
    .drop_duplicates('game_play_id', keep='last')
    [['game_play_id','oline_side']]
    .rename(columns={'oline_side':'oline_side_last_presnap'})
)
df_motion = df_motion.merge(last_presnap_frame_side, on='game_play_id', how='left')
del last_presnap_frame_side
motion_enters_oline_gids = (
    df_motion
    .query('motion_frame and oline_side.isin(["inside-oline-left","inside-oline-right","within-oline"])')
    .game_play_id
    .unique()
    .tolist()
)
df_motion['motion_enters_oline'] = np.where(
    df_motion['game_play_id'].isin(motion_enters_oline_gids),
    True,
    False
)

# df_motion['moving_directly_forward'] = (df_motion['dir'] > 45) & (df_motion['dir'] <= 135)
# df_motion['moving_directly_left'] = (df_motion['dir'] > 135) & (df_motion['dir'] <= 225)
df_motion['moving_leftish'] = (df_motion['dir'] > 90) & (df_motion['dir'] <= 270)
# df_motion['moving_directly_backward'] = (df_motion['dir'] > 225) & (df_motion['dir'] <= 315)
# df_motion['moving_directly_right'] = (df_motion['dir'] > 315) | (df_motion['dir'] <= 45)
df_motion['moving_rightish'] = (df_motion['dir'] > 270) | (df_motion['dir'] <= 90)

df_motion['moving_left'] = np.where(
    df_motion['moving_leftish'],
    1,
    0
)
# Count direction changes with the three frame streak in that direction
filtered_motion = (
    df_motion
    .query('motion_frame') 
    .sort_values(['game_play_id', 'frame_id'])  # Ensure proper ordering
)
def count_direction_changes_and_group(group):
    directions = group['moving_left'].values
    rolling_streaks = pd.Series(directions).rolling(3).apply(lambda x: len(set(x)) == 1).fillna(0).astype(bool)
    
    group_id = []
    current_group = 0
    prev_direction = directions[0]
    
    for i in range(len(rolling_streaks)):
        if rolling_streaks[i]:  # Valid streak found
            current_direction = directions[i]
            if current_direction != prev_direction:  # Change in direction
                current_group += 1
                prev_direction = current_direction
        group_id.append(current_group)
    
    group = group.copy()
    group['direction_group_id'] = group_id
    n_changes = current_group  # Total changes are equal to the last group ID
    group['n_direction_changes'] = n_changes
    return group
direction_grouped = (
    filtered_motion
    .groupby('game_play_id', group_keys=False)  # Group by game_play_id
    .apply(count_direction_changes_and_group)
)
df_motion = df_motion.merge(
    direction_grouped[['game_play_id', 'frame_id', 'direction_group_id', 'n_direction_changes']],
    on=['game_play_id', 'frame_id'],
    how='left'
)

# group by game_play_id, then set all frmaes to n_direciton_chnages value
n_direction_changes = (
    df_motion
    .query('motion_frame')
    .groupby('game_play_id')
    .agg(
        n_direction_changes=('n_direction_changes','first')
    )
)
df_motion = (
    df_motion
    .drop(columns='n_direction_changes')
    .merge(n_direction_changes, on='game_play_id', how='left')
)

motion_dir = (
    df_motion
    .query('frame_type == "BEFORE_SNAP" and motion_frame')
    [['game_play_id','moving_leftish','moving_rightish']]
    .copy()
)

motion_dir = (
    motion_dir
    .groupby('game_play_id')
    .agg(
        # motion_forward_only=('moving_directly_forward','all'),
        # motion_left_only=('moving_directly_left','all'),
        # motion_backward_only=('moving_directly_backward','all'),
        # motion_right_only=('moving_directly_right','all'),
        motion_leftish_all=('moving_leftish','all'),
        motion_rightish_all=('moving_rightish','all')
    )
)

df_motion = df_motion.merge(motion_dir, on='game_play_id', how='left')

df_motion['same_motion_dir'] = np.select(
    [
        df_motion['motion_leftish_all'],
        df_motion['motion_rightish_all']
    ],
    [
        'left-all',
        'right-all'
    ],
    default=None
)

df_motion.drop(columns=['motion_leftish_all','motion_rightish_all'], inplace=True)

motion_dir_by_group = (
    df_motion
    .query('motion_frame')
    .groupby(['game_play_id','direction_group_id'])
    .agg(
        n_frame_leftish=('moving_leftish','sum'),
        n_frame_rightish=('moving_rightish','sum'),
        contains_motion_frame=('motion_frame','any')
    )
    .reset_index()
)
motion_dir_by_group['dir_smoothed'] = np.select(
    [
        motion_dir_by_group['n_frame_leftish'] >= motion_dir_by_group['n_frame_rightish'],
        motion_dir_by_group['n_frame_leftish'] < motion_dir_by_group['n_frame_rightish']
    ],
    [
        'left',
        'right'
    ],
    default=None
)

# motion dir on first line set window
motion_dir_by_group_first = (
    motion_dir_by_group
    .drop_duplicates('game_play_id', keep='first')
    .copy()
    .rename(columns={'dir_smoothed':'motion_dir_first'})
)
df_motion = df_motion.merge(
    motion_dir_by_group_first[['game_play_id','motion_dir_first']], 
    on='game_play_id', 
    how='left'
)

# motion dir on last line set window
motion_dir_by_group_last = (
    motion_dir_by_group
    .drop_duplicates('game_play_id', keep='last')
    .copy()
    .rename(columns={'dir_smoothed':'motion_dir_last'})
)
df_motion = df_motion.merge(
    motion_dir_by_group_last[['game_play_id','motion_dir_last']],
    on='game_play_id',
    how='left'
)


df_motion = df_motion.merge(
    motion_dir_by_group[['game_play_id','direction_group_id','dir_smoothed','contains_motion_frame']], 
    on=['game_play_id','direction_group_id'], 
    how='left'
)
df_motion['contains_motion_frame'] = df_motion['contains_motion_frame'].fillna(False)

#Add QB x,y to df_motion
df_motion = df_motion.merge(
    (
        df_tracking
        .query('position_by_loc=="QB"')
        [['game_play_id','frame_id','x','y']]
        .rename(columns={'x':'qb_x','y':'qb_y'})
    ),
    on=['game_play_id','frame_id'],
    how='left'
)

# find max dx on motion
x_min_max = (
    df_motion
    .query('motion_frame')
    .groupby('game_play_id')
    .agg(
        x_min=('x','min'),
        x_max=('x','max'),
    )
)
x_min_max['dx'] = x_min_max['x_max'] - x_min_max['x_min']
df_motion = df_motion.merge(x_min_max[['dx']], on='game_play_id', how='left')

# find max dy on motion
y_min_max = (
    df_motion
    .query('motion_frame')
    .groupby('game_play_id')
    .agg(
        y_min=('y','min'),
        y_max=('y','max'),
    )
)
y_min_max['dy'] = y_min_max['y_max'] - y_min_max['y_min']
df_motion = df_motion.merge(y_min_max[['dy']], on='game_play_id', how='left')

# behind center at snap
frame_at_snap = (
    df_motion[['game_play_id','nfl_id','center_x_at_line_set']].drop_duplicates()
    .merge(
        (
            df_tracking.query('frame_type=="SNAP"')
            [['game_play_id','nfl_id','x','y']]
            .rename(columns={'x':'x_at_line_set','y':'y_at_line_set'})
        ),
        on=['game_play_id','nfl_id'],
        how='left'
    )
)
frame_at_snap['behind_center_at_snap'] = np.where(
    (frame_at_snap['center_x_at_line_set'] - 0.5 < frame_at_snap['x_at_line_set']) &
    (frame_at_snap['x_at_line_set'] < frame_at_snap['center_x_at_line_set'] + 0.5),
    True,
    False
)
df_motion = df_motion.merge(frame_at_snap[['game_play_id','behind_center_at_snap']], on='game_play_id', how='left')

# Farthest back player at snap
farthest_back_at_snap = (
    df_tracking
    .query('frame_type=="SNAP"')
    .groupby('game_play_id')
    .agg(
        farthest_back_y=('y','min')
    ).reset_index()
)
motion_nfl_ids = df_motion.nfl_id.unique().tolist()
farthest_back = (
    frame_at_snap.query('nfl_id.isin(@motion_nfl_ids)').merge(farthest_back_at_snap, on='game_play_id', how='left')
)
farthest_back['farthest_back_at_snap'] = np.where(
    farthest_back['y_at_line_set'] == farthest_back['farthest_back_y'],
    True,
    False
)
df_motion = df_motion.merge(farthest_back[['game_play_id','farthest_back_at_snap']], on='game_play_id', how='left')
del farthest_back_at_snap, frame_at_snap

# set frame of first motion
first_motion_frame = (
    df_motion
    .query('motion_frame')
    .groupby('game_play_id')
    .agg(
        first_motion_fid=('frame_id','min')
    )
)
df_motion = df_motion.merge(first_motion_frame, on='game_play_id', how='left')

ball_snap_frame = (
    df_tracking
    .query('frame_type=="SNAP"')
    [['game_play_id','frame_id']]
    .rename(columns={'frame_id':'ball_snap_fid'})
    .drop_duplicates('game_play_id')
)
df_motion = df_motion.merge(ball_snap_frame, on='game_play_id', how='left')

df_motion.head()

Unnamed: 0,game_play_id,nfl_id,frame_id,frame_type,event_new,motion_frame,position_by_loc,extra_oline_box_left,oline_box_left,center_x_at_line_set,oline_box_right,extra_oline_box_right,qb_x_last_line_set,qb_y_last_line_set,x,y,s,a,o,dir,line_set_window_number,moving,had_rush_attempt,absolute_yardline_number,initial_alignment,dx_outside_oline,dx_inside_oline,oline_side,motion_crosses_over_oline,oline_side_first,oline_side_last_presnap,motion_enters_oline,moving_leftish,moving_rightish,moving_left,direction_group_id,n_direction_changes,same_motion_dir,motion_dir_first,motion_dir_last,dir_smoothed,contains_motion_frame,qb_x,qb_y,dx,dy,behind_center_at_snap,farthest_back_at_snap,first_motion_fid,ball_snap_fid
0,2022090800_101,53079.0,52,BEFORE_SNAP,line_set,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.62,43.21,0.03,0.16,91.34,91.79,0,False,0,48,,,3.16,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.41,1.32,0.13,False,False,100,106
1,2022090800_101,53079.0,53,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.6,43.21,0.07,0.57,95.49,160.58,0,False,0,48,,,3.18,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.66,46.4,1.32,0.13,False,False,100,106
2,2022090800_101,53079.0,54,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.59,43.21,0.2,1.24,100.55,175.41,0,False,0,48,,,3.19,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106
3,2022090800_101,53079.0,55,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.55,43.22,0.41,1.64,105.78,178.76,0,False,0,48,,,3.23,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106
4,2022090800_101,53079.0,56,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.49,43.22,0.68,1.9,112.18,178.05,0,False,0,48,,,3.29,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.39,1.32,0.13,False,False,100,106


In [30]:
# add feature of if motion is ahead or behind qb when palyer crosses qb path
motion_crossing_qb = (
    df_motion
    .query('frame_id > first_motion_fid and frame_id < ball_snap_fid + 10 and qb_x - 0.5 < x and x < qb_x + 0.5')
    [['game_play_id','direction_group_id','y','qb_y']]
    .drop_duplicates(['game_play_id','direction_group_id'], keep='first')
    .copy()
)
# fill na direction_group_id the previous value + 1
df_motion.loc[df_motion['direction_group_id'].isna(), 'direction_group_id'] = (
    df_motion['direction_group_id'].shift() + 1
)

motion_crossing_qb['motion_crossing_qb'] = np.where(
    motion_crossing_qb['y'] > motion_crossing_qb['qb_y'],
    'in-front-of-qb',
    'behind-qb'
)

if 'motion_crossing_qb' in df_motion.columns:
    df_motion.drop(columns='motion_crossing_qb', inplace=True)
df_motion = df_motion.merge(
    motion_crossing_qb[['game_play_id','direction_group_id','motion_crossing_qb']], 
    on=['game_play_id','direction_group_id'], 
    how='left'
)

df_motion.loc[df_motion['motion_crossing_qb'].isna() & df_motion['motion_frame'], 'motion_crossing_qb'] = 'not-crossing-qb'

last_motion_crossing_qb = (
    motion_crossing_qb
    .drop_duplicates('game_play_id', keep='last')
    .copy()
    .rename(columns={'motion_crossing_qb':'motion_crossing_qb_last'})
)

if 'motion_crossing_qb_last' in df_motion.columns:
    df_motion.drop(columns='motion_crossing_qb_last', inplace=True)
df_motion = df_motion.merge(
    last_motion_crossing_qb[['game_play_id','motion_crossing_qb_last']], 
    on='game_play_id', 
    how='left'
)

first_motion_crossing_qb = (
    motion_crossing_qb
    .drop_duplicates('game_play_id', keep='first')
    .copy()
    .rename(columns={'motion_crossing_qb':'motion_crossing_qb_first'})
)

if 'motion_crossing_qb_first' in df_motion.columns:
    df_motion.drop(columns='motion_crossing_qb_first', inplace=True)
df_motion = df_motion.merge(
    first_motion_crossing_qb[['game_play_id','motion_crossing_qb_first']], 
    on='game_play_id', 
    how='left'
)

# Classify Yo-Yo Motion and its variants

In [31]:
# Classify Yo-Yo motion (and one case of Orbit motion)

df_motion['motion_group'] = None
df_motion['motion_sub_group'] = None

# Yo-Yo motion: if changes direction more than once and starts motion moving towardes the qb
df_motion['motion_group'] = np.where(
    (
        (df_motion['n_direction_changes'] > 0) &
        (df_motion['dx'] > 2) &
        (
            (
                (df_motion['oline_side_first'] == "outside-left") & (df_motion['motion_dir_first'] == "right")
            ) |
            (
                (df_motion['oline_side_first'] == "outside-right") & (df_motion['motion_dir_first'] == "left")
            )
        )
    ),
    'Yo-Yo',
    df_motion['motion_group']
)

df_motion['motion_sub_group'] = np.where(
    (df_motion['motion_group'] == 'Yo-Yo'),
    np.where(
        (df_motion['n_direction_changes'] == 3),
        'Triple Yo-Yo',
        np.where(
            (df_motion['n_direction_changes'] == 2),
            'Double Yo-Yo',
            np.where(
                df_motion['behind_center_at_snap'] & ~df_motion['farthest_back_at_snap'],
                'Yo-Yo Lead',
                np.where(
                    (df_motion['motion_crossing_qb_first'] == 'in-front-of-qb') &
                    (df_motion['motion_crossing_qb_last'] == 'behind-qb'),
                    'Under Orbit',
                    np.where(
                        df_motion['motion_crosses_over_oline'],
                        'Full Yo-Yo',
                        np.where(
                            df_motion['motion_enters_oline'],
                            'Half Yo-Yo',
                            None
                        )
                    )
                )
            )
        )
    ),
    df_motion['motion_sub_group']
)

df_motion['motion_group'] = np.where(
    df_motion['motion_sub_group'] == 'Under Orbit',
    'Orbit',
    df_motion['motion_group']
)

# Classify Split-Flow Over Motion

In [32]:
df_motion['dx_dy_ratio'] = df_motion['dx'] / df_motion['dy']

if 'x_2_sec_after_snap' in df_motion.columns:
    df_motion.drop(columns=['x_2_sec_after_snap','y_2_sec_after_snap'], inplace=True)

df_motion = (
    df_motion
    .merge(
        (
            df_tracking.query('frame_id == ball_snap_fid + 20')
            [['game_play_id','nfl_id','x','y']]
            .rename(columns={'x':'x_2_sec_after_snap','y':'y_2_sec_after_snap'})
        ),
        on=['game_play_id','nfl_id'],
        how='left'
    )
)

# Feature for motion man has not reached center before snap (or not far past center)
x_loc_at_ball_snap = (
    df_motion
    .query('frame_id == ball_snap_fid')
    [['game_play_id','nfl_id','x']]
    .rename(columns={'x':'x_loc_at_ball_snap'})
)
df_motion = df_motion.merge(x_loc_at_ball_snap, on=['game_play_id','nfl_id'], how='left')
del x_loc_at_ball_snap

# This feature only applies to single direction motions
df_motion['ball_snap_before_passing_center'] = np.where(
    (
        (df_motion['oline_side_first'] == 'outside-left') &
        (df_motion['same_motion_dir'] == 'right-all') &
        (df_motion['x_loc_at_ball_snap'] < df_motion['center_x_at_line_set'] + 0.5)
    ) |
    (
        (df_motion['oline_side_first'] == 'outside-right') &
        (df_motion['same_motion_dir'] == 'left-all') &
        (df_motion['x_loc_at_ball_snap'] > df_motion['center_x_at_line_set'] - 0.5)
    ),
    True,
    False
)


df_motion['motion_group'] = np.where(
    (df_motion['n_direction_changes'] == 0) &
    (df_motion['y_2_sec_after_snap'] >= df_motion['absolute_yardline_number'] - 3) &
    (df_motion['dx_dy_ratio'] >= 2) &
    (df_motion['initial_alignment'] == 'Off-Line Y') &
    (
        (
            (df_motion['oline_side_first'] == 'outside-left') &
            (df_motion['motion_dir_first'] == 'right') &
            (df_motion['x_2_sec_after_snap'] >= df_motion['center_x_at_line_set'] + 1)
        ) |
        (
            (df_motion['oline_side_first'] == 'outside-right') &
            (df_motion['motion_dir_first'] == 'left') &
            (df_motion['x_2_sec_after_snap'] <= df_motion['center_x_at_line_set'] - 1)
        )
    ),
    'Split-Flow Over',
    df_motion['motion_group']
)

df_motion['motion_sub_group'] = np.where(
    (df_motion['motion_group'] == 'Split-Flow Over'),
    np.where(
        df_motion['ball_snap_before_passing_center'],
        'Ball Snap Before Passing Center',
        'Ball Snap After Passing Center'
    ),
    df_motion['motion_sub_group']
)

In [33]:
df_motion.head()

Unnamed: 0,game_play_id,nfl_id,frame_id,frame_type,event_new,motion_frame,position_by_loc,extra_oline_box_left,oline_box_left,center_x_at_line_set,oline_box_right,extra_oline_box_right,qb_x_last_line_set,qb_y_last_line_set,x,y,s,a,o,dir,line_set_window_number,moving,had_rush_attempt,absolute_yardline_number,initial_alignment,dx_outside_oline,dx_inside_oline,oline_side,motion_crosses_over_oline,oline_side_first,oline_side_last_presnap,motion_enters_oline,moving_leftish,moving_rightish,moving_left,direction_group_id,n_direction_changes,same_motion_dir,motion_dir_first,motion_dir_last,dir_smoothed,contains_motion_frame,qb_x,qb_y,dx,dy,behind_center_at_snap,farthest_back_at_snap,first_motion_fid,ball_snap_fid,motion_crossing_qb,motion_crossing_qb_last,motion_crossing_qb_first,motion_group,motion_sub_group,dx_dy_ratio,x_2_sec_after_snap,y_2_sec_after_snap,x_loc_at_ball_snap,ball_snap_before_passing_center
0,2022090800_101,53079.0,52,BEFORE_SNAP,line_set,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.62,43.21,0.03,0.16,91.34,91.79,0,False,0,48,,,3.16,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.41,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,,,10.153846,39.33,47.81,26.33,True
1,2022090800_101,53079.0,53,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.6,43.21,0.07,0.57,95.49,160.58,0,False,0,48,,,3.18,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.66,46.4,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,,,10.153846,39.33,47.81,26.33,True
2,2022090800_101,53079.0,54,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.59,43.21,0.2,1.24,100.55,175.41,0,False,0,48,,,3.19,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,,,10.153846,39.33,47.81,26.33,True
3,2022090800_101,53079.0,55,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.55,43.22,0.41,1.64,105.78,178.76,0,False,0,48,,,3.23,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,,,10.153846,39.33,47.81,26.33,True
4,2022090800_101,53079.0,56,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.49,43.22,0.68,1.9,112.18,178.05,0,False,0,48,,,3.29,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,,,10.153846,39.33,47.81,26.33,True


# Classify Orbit, Fly, & Jet Motion

In [34]:
cols = ['game_play_id', 'nfl_id', 'first_motion_fid', 'ball_snap_fid','position_by_loc','oline_box_left','oline_box_right',
        'oline_side_first','same_motion_dir','initial_alignment','n_direction_changes','dx','dy',
        'dx_dy_ratio', 'qb_x','qb_y','absolute_yardline_number',
        'motion_group','motion_sub_group']
df_motion_all_frames = (
    df_motion.query('motion_frame')[cols].drop_duplicates(['game_play_id'])
    .merge(
        df_tracking[['game_play_id','nfl_id','frame_id','event_new','frame_type','x','y','s','a','dir']],
        on=['game_play_id','nfl_id'],
        how='left'
    )
)

# column for if motion crosses to other side of oline
if 'x_loc_2_sec_after_snap' in df_motion_all_frames.columns:
    df_motion_all_frames.drop(columns='x_loc_2_sec_after_snap', inplace=True)

x_loc_2_sec_after_snap = (
    df_motion_all_frames
    .query('frame_id == ball_snap_fid + 30')
    [['game_play_id','x']]
    .rename(columns={'x':'x_loc_2_sec_after_snap'})
)
df_motion_all_frames = df_motion_all_frames.merge(x_loc_2_sec_after_snap, on='game_play_id', how='left')
del x_loc_2_sec_after_snap

if 'motion_crosses_to_other_side' in df_motion_all_frames.columns:
    df_motion_all_frames.drop(columns='motion_crosses_to_other_side', inplace=True)

df_motion_all_frames['motion_crosses_to_other_side'] = np.where(
    (df_motion_all_frames['n_direction_changes'] == 0) &
    (df_motion_all_frames['y'] < df_motion_all_frames['absolute_yardline_number']) &
    (
        (
            (df_motion_all_frames['oline_side_first'] == 'outside-left') &
            (df_motion_all_frames['same_motion_dir'] == 'right-all') &
            (df_motion_all_frames['x_loc_2_sec_after_snap'] > df_motion_all_frames['oline_box_right'])
        ) |
        (
            (df_motion_all_frames['oline_side_first'] == 'outside-right') &
            (df_motion_all_frames['same_motion_dir'] == 'left-all') &
            (df_motion_all_frames['x_loc_2_sec_after_snap'] < df_motion_all_frames['oline_box_left'])
        )
    ),
    True,
    False
)

df_motion_crossing_oline = (
    df_motion_all_frames
    .query('motion_crosses_to_other_side')
    .copy()
)
del df_motion_all_frames

# find the frame crossing the qb (need to redo in front or behind because above limits it to 1 second after snap)
motion_crossing_qb = (
    df_motion_crossing_oline
    .query('first_motion_fid <= frame_id and frame_id <= ball_snap_fid + 20 and qb_x - 0.5 < x and x < qb_x + 0.5')
    [['game_play_id','frame_id','x','y','qb_x','qb_y']]
    .drop_duplicates('game_play_id', keep='first')
    .copy()
)
motion_crossing_qb['motion_behind_qb'] = np.where(
    motion_crossing_qb['qb_y'] > motion_crossing_qb['y'],
    True,
    False
)
motion_crossing_qb['dx'] = (motion_crossing_qb['x'] - motion_crossing_qb['qb_x']).abs()
motion_crossing_qb = (
    motion_crossing_qb
    .sort_values('dx')
    .drop_duplicates('game_play_id', keep='first')
    .rename(columns={'frame_id':'frame_id_crossing_qb'})
)

df_motion_crossing_oline = df_motion_crossing_oline.merge(
    motion_crossing_qb[['game_play_id','motion_behind_qb','frame_id_crossing_qb']], 
    on='game_play_id', 
    how='left'
)

df_arc_motion = (
    df_motion_crossing_oline
    .query('first_motion_fid <= frame_id and frame_id <= frame_id_crossing_qb')
    [['game_play_id','x','y']]
)

# Assuming df_arc_motion is already defined
def calculate_path_distance(group):
    ''' Total distance traveled along the path and Euclidean distance between 
    first and last points '''
    path_distance = np.sum(
        np.sqrt(np.diff(group['x'])**2 + np.diff(group['y'])**2)
    )
    # Compute straight-line distance between first and last points
    straight_line_distance = np.sqrt(
        (group['x'].iloc[-1] - group['x'].iloc[0])**2 +
        (group['y'].iloc[-1] - group['y'].iloc[0])**2
    )
    return pd.Series({
        'path_distance': path_distance,
        'straight_line_distance': straight_line_distance,
        'dy_motion_to_qb': group['y'].iloc[-1] - group['y'].iloc[0]
    })

# Apply the function to the DataFrame
result = (
    df_arc_motion
    .groupby('game_play_id')
    .apply(calculate_path_distance)
    .reset_index()
)

df_arc_motion = df_arc_motion.merge(result, on='game_play_id', how='left')
del result

df_arc_motion['path_straight_ratio'] = df_arc_motion['path_distance'] / df_arc_motion['straight_line_distance']

df_motion_crossing_oline = df_motion_crossing_oline.merge(
    df_arc_motion[['game_play_id','path_distance','path_straight_ratio','dy_motion_to_qb']], 
    on='game_play_id', 
    how='left'
)

df_motion_crossing_oline['motion_group'] = np.where(
    df_motion_crossing_oline.motion_group.isna(),
    np.where(
        df_motion_crossing_oline['motion_behind_qb'] &
        (
            (df_motion_crossing_oline['dy_motion_to_qb'] <= -2) |
            (df_motion_crossing_oline['path_straight_ratio'] >= 1.005)
        ),
        'Orbit',
        np.where(
            df_motion_crossing_oline['frame_id_crossing_qb'] <= df_motion_crossing_oline['ball_snap_fid'] - 5,
            'Fly',
            'Jet'
        )
    ),
    df_motion_crossing_oline.motion_group
)
df_motion_crossing_oline['motion_sub_group'] = 'Standard'

df_motion = (
    df_motion
    .merge(
        (
            df_motion_crossing_oline
            [['game_play_id','motion_group','motion_sub_group']]
            .drop_duplicates('game_play_id')
        ),
        on='game_play_id',
        how='left',
        suffixes=('','_x')
    )
)

df_motion['motion_group'] = np.where(
    df_motion['motion_group'].isna(),
    df_motion['motion_group_x'],
    df_motion['motion_group']
)
df_motion['motion_sub_group'] = np.where(
    df_motion['motion_sub_group'].isna(),
    df_motion['motion_sub_group_x'],
    df_motion['motion_sub_group']
)
df_motion.drop(columns=['motion_group_x','motion_sub_group_x'], inplace=True)

# Classify Condense Motion

In [35]:
df_motion.head()

Unnamed: 0,game_play_id,nfl_id,frame_id,frame_type,event_new,motion_frame,position_by_loc,extra_oline_box_left,oline_box_left,center_x_at_line_set,oline_box_right,extra_oline_box_right,qb_x_last_line_set,qb_y_last_line_set,x,y,s,a,o,dir,line_set_window_number,moving,had_rush_attempt,absolute_yardline_number,initial_alignment,dx_outside_oline,dx_inside_oline,oline_side,motion_crosses_over_oline,oline_side_first,oline_side_last_presnap,motion_enters_oline,moving_leftish,moving_rightish,moving_left,direction_group_id,n_direction_changes,same_motion_dir,motion_dir_first,motion_dir_last,dir_smoothed,contains_motion_frame,qb_x,qb_y,dx,dy,behind_center_at_snap,farthest_back_at_snap,first_motion_fid,ball_snap_fid,motion_crossing_qb,motion_crossing_qb_last,motion_crossing_qb_first,motion_group,motion_sub_group,dx_dy_ratio,x_2_sec_after_snap,y_2_sec_after_snap,x_loc_at_ball_snap,ball_snap_before_passing_center
0,2022090800_101,53079.0,52,BEFORE_SNAP,line_set,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.62,43.21,0.03,0.16,91.34,91.79,0,False,0,48,,,3.16,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.41,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,Jet,Standard,10.153846,39.33,47.81,26.33,True
1,2022090800_101,53079.0,53,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.6,43.21,0.07,0.57,95.49,160.58,0,False,0,48,,,3.18,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.66,46.4,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,Jet,Standard,10.153846,39.33,47.81,26.33,True
2,2022090800_101,53079.0,54,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.59,43.21,0.2,1.24,100.55,175.41,0,False,0,48,,,3.19,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,Jet,Standard,10.153846,39.33,47.81,26.33,True
3,2022090800_101,53079.0,55,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.55,43.22,0.41,1.64,105.78,178.76,0,False,0,48,,,3.23,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.65,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,Jet,Standard,10.153846,39.33,47.81,26.33,True
4,2022090800_101,53079.0,56,BEFORE_SNAP,,False,FB,26.04,26.04,29.46,32.78,32.78,29.54,46.49,29.49,43.22,0.68,1.9,112.18,178.05,0,False,0,48,,,3.29,within-oline,True,outside-left,outside-left,False,True,False,1,,0.0,right-all,right,right,,False,29.67,46.39,1.32,0.13,False,False,100,106,behind-qb,behind-qb,behind-qb,Jet,Standard,10.153846,39.33,47.81,26.33,True


In [36]:
cols = ['game_play_id', 'nfl_id', 'first_motion_fid','oline_box_left','oline_box_right',
        'oline_side_first','same_motion_dir','dx_dy_ratio',
        'motion_group','motion_sub_group']
df_motion_all_frames = (
    df_motion.query('motion_frame')[cols].drop_duplicates(['game_play_id'])
    .merge(
        df_tracking[['game_play_id','nfl_id','frame_id','event_new','frame_type',
                     'absolute_yardline_number','x','y','s','a','dir']],
        on=['game_play_id','nfl_id'],
        how='left'
    )
)
x_mins = (
    df_motion_all_frames
    .query('y <= absolute_yardline_number - 1 and frame_id >= first_motion_fid')
    [['game_play_id','x']]
    .sort_values('x')
    .drop_duplicates('game_play_id', keep='first')
    .rename(columns={'x':'x_motion_min'})
)
x_maxs = (
    df_motion_all_frames
    .query('y <= absolute_yardline_number - 1 and frame_id >= first_motion_fid')
    [['game_play_id','x']]
    .sort_values('x')
    .drop_duplicates('game_play_id', keep='last')
    .rename(columns={'x':'x_motion_max'})
)
x_at_ball_snap = (
    df_motion_all_frames
    .query('frame_type=="SNAP"')
    [['game_play_id','x']]
    .rename(columns={'x':'x_at_ball_snap'})
)
df_motion_all_frames = df_motion_all_frames.merge(x_mins, on='game_play_id', how='left')
df_motion_all_frames = df_motion_all_frames.merge(x_maxs, on='game_play_id', how='left')
df_motion_all_frames = df_motion_all_frames.merge(x_at_ball_snap, on='game_play_id', how='left')
df_motion_all_frames['x_motion_max'] = df_motion_all_frames['x_motion_max'].fillna(df_motion_all_frames['x_at_ball_snap'])
del x_mins, x_maxs

In [37]:
qry = df_motion_all_frames.copy()
condense_gids = qry[(
    qry['motion_group'].isna() &
    (qry['dx_dy_ratio'] > 3) &
    (
        (
            (qry['oline_side_first'] == 'outside-left') &
            (qry['same_motion_dir'] == 'right-all') & 
            (qry['x_motion_max'] < qry['oline_box_left'] + 1)
        ) |
        (
            (qry['oline_side_first'] == 'outside-right') &
            (qry['same_motion_dir'] == 'left-all') &
            (qry['x_motion_min'] > qry['oline_box_right'] - 1)
        )
    )
)].game_play_id.unique()

df_motion.loc[df_motion['game_play_id'].isin(condense_gids), 'motion_group'] = 'Condense'
df_motion.loc[df_motion['game_play_id'].isin(condense_gids), 'motion_sub_group'] = 'Standard'

In [38]:
df_motion.drop_duplicates('game_play_id').motion_group.value_counts()

Jet                89
Split-Flow Over    30
Condense           30
Orbit              28
Fly                23
Yo-Yo              19
Name: motion_group, dtype: int64

In [53]:
df_motion.query('motion_group.isna()').game_play_id.unique()

array(['2022090800_2551', '2022091101_3315', '2022091101_3569',
       '2022091101_521', '2022091102_2065', '2022091102_2783',
       '2022091102_343', '2022091102_3981', '2022091103_1037',
       '2022091103_4462', '2022091104_2974', '2022091104_3016',
       '2022091104_4334', '2022091105_1008', '2022091105_4352',
       '2022091105_667', '2022091106_3002', '2022091107_3163',
       '2022091107_469', '2022091107_959', '2022091108_676',
       '2022091109_1266', '2022091109_2481', '2022091109_4075',
       '2022091109_462', '2022091110_1047', '2022091110_219',
       '2022091110_994', '2022091111_100', '2022091112_2072',
       '2022091112_2225', '2022091112_3416', '2022091112_3492',
       '2022091112_3551', '2022091112_3589', '2022091112_715',
       '2022091112_891', '2022091113_1170', '2022091113_1318',
       '2022091113_1436', '2022091113_2494', '2022091113_3642',
       '2022091113_3722', '2022091200_1299', '2022091200_2613',
       '2022091200_446'], dtype=object)

In [40]:
df_motion.query('motion_group=="Condense"').game_play_id.unique()

array(['2022090800_343', '2022091100_2984', '2022091101_3336',
       '2022091101_3961', '2022091102_1029', '2022091102_1363',
       '2022091102_1954', '2022091102_3022', '2022091102_800',
       '2022091104_1115', '2022091104_128', '2022091104_3980',
       '2022091104_604', '2022091105_2153', '2022091105_3413',
       '2022091108_1196', '2022091108_2027', '2022091108_3523',
       '2022091108_3561', '2022091111_1819', '2022091111_2257',
       '2022091111_839', '2022091112_1123', '2022091113_1196',
       '2022091113_2009', '2022091113_2398', '2022091113_3173',
       '2022091113_3211', '2022091113_510', '2022091200_2067'],
      dtype=object)

In [51]:
gpid = "2022091112_2072"
plot_play_with_speed(
    df_tracking.query('first_line_set_fid <= frame_id <= ball_snap_fid + 20 and game_play_id==@gpid'),
    gpid, 
    every_other_frame=True, 
    event_col='event_new',
    plot_motion=True,
    highlight_lineman=True,
    highlight_qb=True,
    show_motion_frames=True
)