In [1]:
import os
import sys
from os.path import join
import json

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import nfl_data_py as nfl

ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(ROOT_DIR,'py'))

import util
from plot.plot_simple import plot_play_with_speed

pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

with open("paths.json", 'r') as f:
    paths = json.load(f)

PROCESSED_DATA_PATH = paths['processed_data']

In [3]:
WEEK = 1

df_tracking = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'tracking_final.pkl'))
df_game = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'games_final.pkl'))
df_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'play_final.pkl'))
df_player_play = pd.read_pickle(join(PROCESSED_DATA_PATH, f'wk{WEEK}', 'player_play_final.pkl'))
df_player = pd.read_pickle(join(PROCESSED_DATA_PATH, 'players.pkl'))
df_team = pd.read_pickle(join(PROCESSED_DATA_PATH, 'teams.pkl'))

In [4]:
# drop everything before first line_set event
cols = [
    'game_play_id', 'frame_id','frame_type', 'event_new', 'nfl_id', 'position_by_loc', 
    'extra_oline_box_left', 'oline_box_left', 'center_x_at_line_set', 'oline_box_right', 
    'extra_oline_box_right', 'x', 'y', 's', 'a', 'o', 'dir', 'motion_player'
]
df_motion_and_shifts = df_tracking.query(
    'frame_id >= first_line_set_fid and ' +
    'frame_type=="BEFORE_SNAP" and ' +
    'offense'
)[cols].copy()

# line_set window is each set of frame between line_set events
df_motion_and_shifts = df_motion_and_shifts.sort_values(['game_play_id','frame_id'])
line_set_windows = (
    df_motion_and_shifts
    .query('event_new == "line_set"')  # Only consider 'line_set' events
    .drop_duplicates(['game_play_id', 'frame_id'])  # Ensure unique frame per event
    .assign(line_set_window_number=lambda x: x.groupby('game_play_id').cumcount())  # Number line_set events within each game_play_id
)

# Step 3: Merge back to the original DataFrame
df_motion_and_shifts = df_motion_and_shifts.merge(
    line_set_windows[['game_play_id', 'frame_id', 'line_set_window_number']],
    on=['game_play_id', 'frame_id'],
    how='left'  # Merge without dropping rows from the original DataFrame
)

# drop plays without at occurence of motion_player
motion_gids = (
    df_motion_and_shifts
    .query('motion_player')
    .game_play_id
    .unique()
    .tolist()
)
df_motion_and_shifts = df_motion_and_shifts[df_motion_and_shifts.game_play_id.isin(motion_gids)]

df_motion_and_shifts['line_set_window_number'] = df_motion_and_shifts['line_set_window_number'].fillna(method='ffill').astype(int)

In [5]:
MOVING_THRESHOLD = 1.0
N_FRAMES_NOT_MOVING = 10

tqdm.pandas()

df_motion = df_motion_and_shifts.query('motion_player').copy()

# Step 1: Smooth the 's' column
# df_motion['s_smoothed'] = df_motion['s'].rolling(window=3, center=True, min_periods=1).mean()

# Step 2: Define the 'moving' column based on the threshold
df_motion['moving'] = df_motion['s'] >= MOVING_THRESHOLD

# Step 3: Create motion_frame column
def find_motion_frames(group):
    motion_frame = [False] * len(group)
    moving_array = group['moving'].values
    
    # Work backwards from the last frame
    last_idx = len(moving_array) - 1
    
    # Find the first "moving" frame starting from the end
    first_moving_idx = None
    for idx in range(last_idx, -1, -1):
        if moving_array[idx]:
            first_moving_idx = idx
            break
    
    if first_moving_idx is not None:
        # Set all frames after the first moving frame to True
        for idx in range(first_moving_idx, last_idx + 1):
            motion_frame[idx] = True

        # Move backwards, stopping when 10 consecutive frames are not moving
        consecutive_not_moving = 0
        for idx in range(first_moving_idx, -1, -1):
            if not moving_array[idx]:
                consecutive_not_moving += 1
                if consecutive_not_moving >= N_FRAMES_NOT_MOVING:
                    # Stop marking frames as part of the motion
                    break
                continue
            else:
                for i in range(1, consecutive_not_moving+1):
                    motion_frame[idx+i] = True
                consecutive_not_moving = 0  # Reset counter if moving
            motion_frame[idx] = True

    # Update the group's motion_frame column
    group['motion_frame'] = motion_frame
    return group

# Apply the logic group-wise for each game_play_id
df_motion = df_motion.groupby('game_play_id', group_keys=False).progress_apply(find_motion_frames)

100%|██████████| 266/266 [00:00<00:00, 2225.79it/s]


In [6]:
if 'motion_frame' in df_tracking.columns:
    df_tracking.drop(columns='motion_frame', inplace=True)

df_tracking = df_tracking.merge(df_motion[['game_play_id','frame_id','motion_frame']], on=['game_play_id','frame_id'], how='left')
df_tracking['motion_frame'] = df_tracking['motion_frame'].fillna(False)

In [7]:
if 'motion_player' in df_motion.columns:
    cols = ['game_play_id', 'frame_id', 'event_new',
        'position_by_loc', 'extra_oline_box_left', 'center_x_at_line_set',
        'extra_oline_box_right', 'x', 'y', 's', 'a', 'o', 'dir',
        'line_set_window_number','moving']
    df_motion = df_motion.query('motion_player and motion_frame')[cols]

if 'absolute_yardline_number' not in df_motion.columns:
    df_motion = df_motion.merge(
        df_play[['game_play_id','absolute_yardline_number']],
        on='game_play_id',
        how='left'
    )

df_motion_first_frame = df_motion.drop_duplicates('game_play_id', keep='first').copy()

# Classify First Frame as "Off-Line Y"
df_motion_first_frame['initial_alignment'] = np.where(
    (
        df_motion_first_frame['position_by_loc'].isin(['TE','RB','FB']) &
        (
            (
                (df_motion_first_frame['x'] < df_motion_first_frame['extra_oline_box_left']) &
                (df_motion_first_frame['x'] > df_motion_first_frame['extra_oline_box_left'] - 3)
            ) |
            (
                (df_motion_first_frame['x'] > df_motion_first_frame['extra_oline_box_right']) &
                (df_motion_first_frame['x'] < df_motion_first_frame['extra_oline_box_right'] + 3)
            )
        ) &
        (
            (df_motion_first_frame['y'] < df_motion_first_frame['absolute_yardline_number']-1) &
            (df_motion_first_frame['y'] > df_motion_first_frame['absolute_yardline_number'] - 4)
        )
    ),
    'Off-Line Y',
    None
)

if 'initial_alignment' in df_motion.columns:
    df_motion.drop(columns='initial_alignment', inplace=True)
df_motion = df_motion.merge(
    df_motion_first_frame[['game_play_id','frame_id','initial_alignment']], 
    on=['game_play_id','frame_id'],
    how='left'
)

df_motion['dx_outside_oline'] = np.where(
    df_motion['x'] < df_motion['extra_oline_box_left'],
    df_motion['extra_oline_box_left'] - df_motion['x'],
    np.where(
        df_motion['x'] > df_motion['extra_oline_box_right'],
        df_motion['x'] - df_motion['extra_oline_box_right'],
        None
    )
)

df_motion.head()

Unnamed: 0,game_play_id,frame_id,event_new,position_by_loc,extra_oline_box_left,center_x_at_line_set,extra_oline_box_right,x,y,s,a,o,dir,line_set_window_number,moving,absolute_yardline_number,initial_alignment,dx_outside_oline
0,2022090800_101,100,,FB,26.04,29.46,32.78,24.61,45.55,1.32,2.54,66.04,349.25,1,True,48,Off-Line Y,1.43
1,2022090800_101,101,,FB,26.04,29.46,32.78,24.78,45.53,1.82,3.07,58.22,351.97,1,True,48,,1.26
2,2022090800_101,102,,FB,26.04,29.46,32.78,25.0,45.5,2.36,3.55,43.23,353.48,1,True,48,,1.04
3,2022090800_101,103,,FB,26.04,29.46,32.78,25.26,45.47,2.83,3.48,34.58,354.23,1,True,48,,0.78
4,2022090800_101,104,,FB,26.04,29.46,32.78,25.57,45.44,3.3,3.3,28.64,354.9,1,True,48,,0.47


In [8]:
df_motion.query('initial_alignment == "Off-Line Y"').game_play_id.unique()

array(['2022090800_101', '2022090800_2043', '2022090800_2072',
       '2022090800_2163', '2022090800_2506', '2022090800_529',
       '2022091100_1413', '2022091100_2491', '2022091100_2720',
       '2022091100_2741', '2022091100_870', '2022091101_3569',
       '2022091102_1286', '2022091102_4019', '2022091102_86',
       '2022091103_1084', '2022091104_1094', '2022091104_3016',
       '2022091106_1425', '2022091106_1945', '2022091106_3277',
       '2022091106_359', '2022091106_883', '2022091107_1099',
       '2022091107_1144', '2022091107_115', '2022091107_1717',
       '2022091107_2113', '2022091107_2551', '2022091107_3163',
       '2022091107_68', '2022091109_1729', '2022091109_3463',
       '2022091109_3752', '2022091110_1047', '2022091110_1092',
       '2022091110_1494', '2022091110_219', '2022091110_3331',
       '2022091110_3797', '2022091110_3821', '2022091110_994',
       '2022091111_166', '2022091111_430', '2022091111_818',
       '2022091113_1436', '2022091113_1984', '202209111

In [9]:
gpid = '2022091106_883'
plot_play_with_speed(
    df_tracking.query('frame_id >= first_line_set_fid and frame_id <= ball_snap_fid + 10'),
    gpid, 
    event_col='event_new', 
    highlight_lineman=True, 
    highlight_qb=True, 
    show_motion_frames=True
)