## Load necessary libraries

In [2]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import csv
import os
os.chdir('/content/drive/My Drive/Colab Notebooks/big data bowl 2025/data')

Mounted at /content/drive


## Load Datasets

In [2]:
games = pd.read_csv('games.csv')
player_plays = pd.read_csv('player_play.csv')
plays = pd.read_csv('plays.csv', quoting=csv.QUOTE_NONE, on_bad_lines='skip')
players = pd.read_csv('players.csv')

In [32]:
week2 = pd.read_csv('tracking_week_2.csv')
week3 = pd.read_csv('tracking_week_3.csv')
week4 = pd.read_csv('tracking_week_4.csv')
week5 = pd.read_csv('tracking_week_5.csv')
week6 = pd.read_csv('tracking_week_6.csv')
week7 = pd.read_csv('tracking_week_7.csv')
week8 = pd.read_csv('tracking_week_8.csv')
week9 = pd.read_csv('tracking_week_9.csv')

In [3]:
week1 = pd.read_csv('tracking_week_1.csv')

## Generating tracking dataset with observations indexed by game, play, and time.
Entries are only kept if there was a motion man that exceeded 8 mph at the moment of the snap because we are looking at high speed motion types at the snap. Contains information about the center and the WR, TE, or RB which satisfied the motion requirement for the play to be kept. Play sample size was redcued to 2546 after this step

In [57]:
players = players.loc[:,['nflId','height', 'weight', 'position']]

Using week 1 as a test before creating an iterative process

In [5]:
week1 = week1.merge(players, how='left', left_on='nflId', right_on='nflId') # Obtain the position of the player from player.csv, useful for filtering

In [7]:
week1 = week1[week1['position'].isin(['WR', 'RB', 'TE', 'C'])] # Filter observations based on position (only these are relevant to create the final dataframe)

In [9]:
week1['speed'] = week1['s'] * 2.04545 # Feature engineer speed in MPH for easier interpretation

Finding game and play Id indices for the motion man and the center

In [16]:
playerInMotionAtSnap = week1[((week1['speed'] > 8) & (week1['frameType'] == 'SNAP'))].drop_duplicates(subset=['gameId', 'playId', 'nflId']) # Boolean mask to filter for speed at the moment of the snap
playerInMotionAtSnap = playerInMotionAtSnap.loc[:,['gameId', 'playId', 'nflId']] # Only need to keep the indice information
center = week1[(week1['position'] == 'C')].drop_duplicates(subset=['gameId', 'playId', 'nflId']) # Obtain a dataframe of only centers
center = center.loc[:,['gameId', 'playId', 'nflId']] # Similarly reduce
playerInMotionAtSnap = playerInMotionAtSnap.merge(center, how='left', left_on=['gameId', 'playId'], right_on=['gameId', 'playId']) # Merge motion man dataframe and center dataframe to find corresponding nflIds

In [17]:
playerInMotionAtSnap.shape

(400, 4)

In [18]:
playerInMotionAtSnap.drop_duplicates(subset= ['gameId', 'playId'], inplace=True) # Duplicates come from some motion players having two "centers" on their offensive line on the play
playerInMotionAtSnap.shape

(343, 4)

In [19]:
playerInMotionAtSnap.head() # nflId_x is the motion man Id while nflId_y is the center Id

Unnamed: 0,gameId,playId,nflId_x,nflId_y
0,2022091200,64,47803.0,43537.0
1,2022091200,85,46096.0,43537.0
2,2022091200,401,52423.0,52491.0
3,2022091200,446,52454.0,52491.0
4,2022091200,643,47847.0,43537.0


In [20]:
playerInMotionAtSnap = pd.melt(playerInMotionAtSnap, id_vars=['gameId', 'playId'], value_vars=['nflId_x', 'nflId_y'], value_name='nflId') # Pivot wider to obtain all the unique Ids needed
playerInMotionAtSnap.head()

Unnamed: 0,gameId,playId,variable,nflId
0,2022091200,64,nflId_x,47803.0
1,2022091200,85,nflId_x,46096.0
2,2022091200,401,nflId_x,52423.0
3,2022091200,446,nflId_x,52454.0
4,2022091200,643,nflId_x,47847.0


In [21]:
playerInMotionAtSnap.drop(columns=['variable'], inplace=True) # Drop the redundant column

In [22]:
week1 = pd.merge(week1, playerInMotionAtSnap, how='inner', left_on=['gameId', 'playId', 'nflId'], right_on=['gameId', 'playId', 'nflId']) # Merge back with the original dataframe, only keeping entries with the relvant game, play, and nfl Ids

In [23]:
week1.shape # Shape of this resulting dataframe

(110914, 22)

In [24]:
week1.head() # What it looks like

Unnamed: 0,gameId,playId,nflId,displayName,frameId,frameType,time,jerseyNumber,club,playDirection,...,s,a,dis,o,dir,event,height,weight,position,speed
0,2022091200,64,43537.0,Austin Blythe,1,BEFORE_SNAP,2022-09-13 00:16:03.5,63.0,SEA,right,...,1.52,0.76,0.16,108.72,95.09,huddle_break_offense,6-3,300.0,C,3.109084
1,2022091200,64,43537.0,Austin Blythe,2,BEFORE_SNAP,2022-09-13 00:16:03.6,63.0,SEA,right,...,1.67,0.91,0.17,113.33,92.89,,6-3,300.0,C,3.415902
2,2022091200,64,43537.0,Austin Blythe,3,BEFORE_SNAP,2022-09-13 00:16:03.7,63.0,SEA,right,...,1.88,0.81,0.19,97.21,92.27,,6-3,300.0,C,3.845446
3,2022091200,64,43537.0,Austin Blythe,4,BEFORE_SNAP,2022-09-13 00:16:03.8,63.0,SEA,right,...,2.07,0.61,0.21,90.33,92.71,,6-3,300.0,C,4.234082
4,2022091200,64,43537.0,Austin Blythe,5,BEFORE_SNAP,2022-09-13 00:16:03.9,63.0,SEA,right,...,2.2,0.34,0.23,91.91,93.16,,6-3,300.0,C,4.49999


In [29]:
week1_pivot = week1.pivot(index=['gameId', 'playId', 'time'], columns='position', values=['speed', 'x', 'y', 'dir', 'o', 'frameType', 'height', 'weight',
                                                                                       'playDirection', 'dis', 'a', 's', 'event', 'displayName', 'nflId']) # Pivot wider so center and motion player info is in the same row

In [30]:
week1_pivot.shape # Shape of this resulting dataframe

(55457, 60)

In [31]:
week1_pivot.head() # What it looks like

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,speed,speed,speed,speed,x,x,x,x,y,y,...,event,event,displayName,displayName,displayName,displayName,nflId,nflId,nflId,nflId
Unnamed: 0_level_1,Unnamed: 1_level_1,position,C,RB,TE,WR,C,RB,TE,WR,C,RB,...,TE,WR,C,RB,TE,WR,C,RB,TE,WR
gameId,playId,time,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
2022090800,80,2022-09-09 00:24:24.5,0.040909,0.081818,,,79.76,81.74,,,29.44,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.6,0.040909,0.081818,,,79.75,81.74,,,29.44,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.7,0.040909,0.061364,,,79.75,81.73,,,29.43,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.8,0.040909,0.061364,,,79.74,81.73,,,29.43,39.43,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.9,0.040909,0.081818,,,79.73,81.72,,,29.43,39.44,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,


In [64]:
week1_pivot[week1_pivot['frameType'].iloc[:,0] == 'SNAP'] # Verifying we have the correct number of plays after the operations

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,speed,speed,speed,speed,x,x,x,x,y,y,...,event,event,displayName,displayName,displayName,displayName,nflId,nflId,nflId,nflId
Unnamed: 0_level_1,Unnamed: 1_level_1,position,C,RB,TE,WR,C,RB,TE,WR,C,RB,...,TE,WR,C,RB,TE,WR,C,RB,TE,WR
gameId,playId,time,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
2022090800,80,2022-09-09 00:24:33.2,0.470454,10.881794,,,79.7,85.83,,,29.45,31.54,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,212,2022-09-09 00:28:20.5,1.309088,,14.604513,,44.18,,46.68,,29.36,,...,ball_snap,,Mitch Morse,,Dawson Knox,,42392.0,,47879.0,
2022090800,299,2022-09-09 00:33:31,1.002271,,,14.215878,26.24,,,23.39,23.59,,...,,ball_snap,Brian Allen,,,Cooper Kupp,46180.0,,,44881.0
2022090800,393,2022-09-09 00:35:59.1,0.081818,,,12.293154,51.23,,,48.76,23.76,,...,,ball_snap,Brian Allen,,,Cooper Kupp,46180.0,,,44881.0
2022090800,438,2022-09-09 00:36:55.8,0.122727,,,13.786333,62.26,,,59.29,23.65,,...,,ball_snap,Brian Allen,,,Ben Skowronek,46180.0,,,53678.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022091200,3048,2022-09-13 02:32:01.6,0.654544,,,14.931785,90.54,,,93.34,29.85,,...,,ball_snap,Lloyd Cushenberry,,,Jerry Jeudy,52491.0,,,52423.0
2022091200,3077,2022-09-13 02:32:43.5,1.677269,,,13.397698,86.59,,,89.25,29.62,,...,,ball_snap,Lloyd Cushenberry,,,K.J. Hamler,52491.0,,,52454.0
2022091200,3382,2022-09-13 02:42:03.5,0.531817,,,11.249975,18.45,,,20.63,29.88,,...,,ball_snap,Lloyd Cushenberry,,,Courtland Sutton,52491.0,,,46109.0
2022091200,3491,2022-09-13 02:48:05,0.265909,12.518154,,,41.68,35.82,,,24.08,27.25,...,,,Austin Blythe,Rashaad Penny,,,43537.0,46096.0,,


## Iterative step to apply above process on the other eight weeks of data and concatenate together

In [33]:
tracking = week1_pivot.copy()
week_list = [week2, week3, week4, week5, week6, week7, week8, week9]
for week in week_list:
  week = week.merge(players, how='left', left_on='nflId', right_on='nflId')
  week = week[week['position'].isin(['WR', 'RB', 'TE', 'C'])]
  week['speed'] = week['s'] * 2.04545
  playerInMotionAtSnap = week[((week['speed'] > 8) & (week['frameType'] == 'SNAP'))].drop_duplicates(subset=['gameId', 'playId', 'nflId'])
  playerInMotionAtSnap = playerInMotionAtSnap.loc[:,['gameId', 'playId', 'nflId']]
  center = week[(week['position'] == 'C')].drop_duplicates(subset=['gameId', 'playId', 'nflId'])
  center = center.loc[:,['gameId', 'playId', 'nflId']]
  playerInMotionAtSnap = playerInMotionAtSnap.merge(center, how='left', left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])
  playerInMotionAtSnap.drop_duplicates(subset= ['gameId', 'playId'], inplace=True)
  print(playerInMotionAtSnap.shape)
  playerInMotionAtSnap = pd.melt(playerInMotionAtSnap, id_vars=['gameId', 'playId'], value_vars=['nflId_x', 'nflId_y'], value_name='nflId')
  playerInMotionAtSnap.drop(columns=['variable'], inplace=True)
  week = pd.merge(week, playerInMotionAtSnap, how='inner', left_on=['gameId', 'playId', 'nflId'], right_on=['gameId', 'playId', 'nflId'])
  week_pivot = week.pivot(index=['gameId', 'playId', 'time'], columns='position', values = ['speed', 'x', 'y', 'dir', 'o', 'frameType', 'height', 'weight',
                                                                                       'playDirection', 'dis', 'a', 's', 'event', 'displayName', 'nflId'])
  tracking = pd.concat([tracking, week_pivot])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(275, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(281, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(263, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(279, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(264, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(286, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(301, 4)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  week['speed'] = week['s'] * 2.04545


(254, 4)


In [35]:
tracking.to_csv('tracking.csv') # Save dataframe

## Creating dataframe uniquely indexed by game and play Id only
Contains important variables like nflId, displayName, position of motion man, speed of motion man at snap, type of motion, lateral distance traveled, direction of motion (short or long)

Types of Motion
1. Jet - ball snapped before receiver crosses center
2. Fly - ball snapped after receiver crosses center
3. Glide In - Receiver breaks towards center but never crosses him
4. Glide Out - Receiver breaks away from center
5. Return - Receiver crosses center twice, moving toward original starting place at snap

**Entries not identified with one of these types of motion are dropped**

This section used to re-load the tracking dataset in to prepare for final cleaning

In [3]:
tracking = pd.read_csv('tracking.csv', header= [0,1], index_col=[0,1,2])
tracking.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,speed,speed,speed,speed,x,x,x,x,y,y,...,event,event,displayName,displayName,displayName,displayName,nflId,nflId,nflId,nflId
Unnamed: 0_level_1,Unnamed: 1_level_1,position,C,RB,TE,WR,C,RB,TE,WR,C,RB,...,TE,WR,C,RB,TE,WR,C,RB,TE,WR
gameId,playId,time,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
2022090800,80,2022-09-09 00:24:24.5,0.040909,0.081818,,,79.76,81.74,,,29.44,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.6,0.040909,0.081818,,,79.75,81.74,,,29.44,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.7,0.040909,0.061364,,,79.75,81.73,,,29.43,39.42,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.8,0.040909,0.061364,,,79.74,81.73,,,29.43,39.43,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2022090800,80,2022-09-09 00:24:24.9,0.040909,0.081818,,,79.73,81.72,,,29.43,39.44,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,


In [4]:
tracking.reset_index(inplace=True)
tracking['time'] = pd.to_datetime(tracking['time'], format='ISO8601')
tracking.head()

Unnamed: 0_level_0,gameId,playId,time,speed,speed,speed,speed,x,x,x,...,event,event,displayName,displayName,displayName,displayName,nflId,nflId,nflId,nflId
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,C,RB,TE,WR,C,RB,TE,...,TE,WR,C,RB,TE,WR,C,RB,TE,WR
0,2022090800,80,2022-09-09 00:24:24.500,0.040909,0.081818,,,79.76,81.74,,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
1,2022090800,80,2022-09-09 00:24:24.600,0.040909,0.081818,,,79.75,81.74,,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
2,2022090800,80,2022-09-09 00:24:24.700,0.040909,0.061364,,,79.75,81.73,,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
3,2022090800,80,2022-09-09 00:24:24.800,0.040909,0.061364,,,79.74,81.73,,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,
4,2022090800,80,2022-09-09 00:24:24.900,0.040909,0.081818,,,79.73,81.72,,...,,,Mitch Morse,Devin Singletary,,,42392.0,47857.0,,


In [5]:
tracking.set_index(['gameId', 'playId', 'time'], inplace=True)

Used as test to make sure group by operation does not take too long

In [19]:
small_df = tracking.loc[(2022090800, 80): (2022090800, 212), :]

In [20]:
small_df.shape

(313, 60)

custom function applied to each unique group (play) in the tracking dataset

In [11]:
def custom_evaluation_function(group):
  nflid = group['nflId'].dropna(axis=1).iloc[0, -1] # Keep nfl id of motion player
  displayname = group['displayName'].dropna(axis=1).iloc[0, -1] # Keep display name of motion player
  position = group['nflId'].dropna(axis=1).columns[-1] # Keep position of motion player
  offense_movement_direction = group[('playDirection', 'C')].iloc[0] # Keep the offensive movement direction (right or left)
  atsnap = group[group['frameType'].iloc[:,0] == 'SNAP'] # The observation of the play correspnding to the moment of the snap
  if atsnap.shape[0] == 0: # Discard play if it does not have this
    return
  speed = atsnap['speed', position].iloc[0] # Keep the speed of the man in motion at the snap
  atsnap.reset_index(inplace=True)
  atmotion = group[group[('event', position)] == 'man_in_motion'] # The observation of the play correspnding to the moment the motion started
  if atmotion.shape[0] == 0: # Discard play if it does not have this
    return
  atmotion.reset_index(inplace=True)
  distance = distance_motion(atsnap, atmotion, position) # Returns the lateral distance traveled with a helper function
  if distance == 0: # Discard play if this is unidentifiable
    return
  direction = direction_motion(atsnap, position) # Returns whether the motion was direction toward the short or long side of the field
  if direction == 0: # Discard play if this is unidentifiable
    return
  motion_class = motion_type(group, position, atsnap['time'].iloc[0], atmotion['time'].iloc[0]) # Returns the type of motion with a helper function
  if motion_class == 0: # Discard play if this is unidentifiable
    return

  # Return a row of all the information gathered
  return pd.Series({
      'nflId': nflid,
      'displayName': displayname,
      'position': position,
      'speed at snap': speed,
      'lateral distance traveled': distance,
      'offense movement direction': offense_movement_direction,
      'motion direction': direction,
      'motion type': motion_class
  })

custom helper function for assessing the type of motion

In [12]:
def distance_motion(snap, motion, position):
  distance = abs(snap['y', position].iloc[0] - motion['y', position].iloc[0]) # Finds the difference in the y coordinate of the motion man at the start of the motion and the snap
  return distance

custom helper function for assessing direction of motion

In [13]:
def direction_motion(frame, position):
  side = ''
  center_coordinate = frame[('y', 'C')].iloc[0] # Find center coordinate
  motion_orientation = frame[('dir', position)].iloc[0] # Find the direction of motion at the snap

  # Depending on which side of the field the center is on, use the direction of motion player momentum to determine if he is moving toward short or long side of the field
  if (motion_orientation > 270) | (motion_orientation < 90):
    if center_coordinate > 26.65:
      side = 'short'
    else:
      side = 'long'
  else:
      if center_coordinate > 26.65:
        side = 'long'
      else:
        side = 'short'
  return side

Custom helper function for assessing type of motion

In [14]:
from datetime import datetime
from datetime import timedelta

In [39]:
def motion_type(frame, position, snaptime, motiontime):
  frame = frame.reset_index()
  frame.set_index('time', inplace=True)
  postsnap_time = snaptime + timedelta(seconds=1)
  frame = frame.loc[motiontime:postsnap_time] # Filter dataframe to contain only timestamps from the start of the motion until two seconds after the snap
  if frame.shape[0] == 0: # Return empty if the dataframe is empty (no play)
    return 0
  center_coordinate = frame[('y', 'C')].iloc[0] # Get center coordinate
  starting_motion_coord = frame[('y', position)].iloc[0] # Get the starting coordinate of the motion player
  is_bottom = starting_motion_coord < center_coordinate # Determine which side of the center the motion player started
  out_counter = 0 # Keeps track of movement away from the center
  cross = 0 # Keeps track of whether player crosses the center
  cross_id = 0 # Keeps track of when the player crossed the center
  snap_id = 0 # Keeps tracking of when the ball was snapped

  # Iterate through all timestamps from the moment the player goes into motion until two seconds after the ball is snapped
  for id, coord in enumerate(frame[('y', position)]):

    # For each play, record any important information if it occurs
    if is_bottom:
      if (starting_motion_coord > coord):
        out_counter += 1
      elif (coord > center_coordinate):
        cross += 1
        is_bottom = not is_bottom
        cross_id = id
    else:
      if (starting_motion_coord < coord):
        out_counter += 1
      elif (coord < center_coordinate):
        cross += 1
        is_bottom = not is_bottom
        cross_id = id

    # If player has been moving away from the center until the time of snap and he has not crossed the center, deem the motion a glide out
    if (frame[('event', position)].iloc[id] == 'ball_snap'):
      snap_id = id
      if ((out_counter > 10) & (cross == 0)):
        return 'glide out'

  # Otherwise, wait until the end of the iteration to make judgement about motion type
  if (cross == 0): # If motion player has not crossed center yet, glide in
    return 'glide in'
  elif ((cross == 1) & (cross_id < snap_id)): # If motion player crossed center before snap, fly
    return 'fly'
  elif ((cross == 1) & (cross_id > snap_id)): # If motion player crossed center after snap, jet
    return 'jet'
  elif (cross == 2): # If motion player crosses twice, return
    return 'return'
  else: # Otherwise, we consider unknown and not a part of our desired sample
    return 'unknown'

In [40]:
tracking_cleaned = tracking.groupby(['gameId', 'playId']).apply(custom_evaluation_function)
tracking_cleaned

Unnamed: 0_level_0,Unnamed: 1_level_0,nflId,displayName,position,speed at snap,lateral distance traveled,offense movement direction,motion direction,motion type
gameId,playId,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022090800,80,47857.0,Devin Singletary,RB,10.881794,3.64,left,short,return
2022090800,212,47879.0,Dawson Knox,TE,14.604513,5.79,left,short,fly
2022090800,299,44881.0,Cooper Kupp,WR,14.215877,4.04,right,long,jet
2022090800,393,,,,,,,,
2022090800,438,53678.0,Ben Skowronek,WR,13.786333,3.86,right,long,jet
...,...,...,...,...,...,...,...,...,...
2022110700,1577,52500.0,Devin Duvernay,WR,14.952239,6.67,left,short,jet
2022110700,2182,33130.0,DeSean Jackson,WR,12.497699,5.76,right,long,fly
2022110700,2210,54604.0,Isaiah Likely,TE,13.581788,8.04,right,long,fly
2022110700,2286,54604.0,Isaiah Likely,TE,12.784062,9.11,right,long,fly


In [41]:
tracking_cleaned.dropna(axis=0, inplace=True)

In [42]:
tracking_cleaned.shape

(2023, 8)

In [43]:
print(sum(tracking_cleaned['motion type'] == 'glide out'))
print(sum(tracking_cleaned['motion type'] == 'glide in'))
print(sum(tracking_cleaned['motion type'] == 'jet'))
print(sum(tracking_cleaned['motion type'] == 'fly'))
print(sum(tracking_cleaned['motion type'] == 'return'))
print(sum(tracking_cleaned['motion type'] == 'unknown'))

74
220
961
661
42
65


In [44]:
tracking_cleaned = tracking_cleaned.reset_index()
tracking_cleaned = tracking_cleaned[tracking_cleaned['motion type'] != 'unknown']

In [45]:
tracking_cleaned.head()

Unnamed: 0,gameId,playId,nflId,displayName,position,speed at snap,lateral distance traveled,offense movement direction,motion direction,motion type
0,2022090800,80,47857.0,Devin Singletary,RB,10.881794,3.64,left,short,return
1,2022090800,212,47879.0,Dawson Knox,TE,14.604513,5.79,left,short,fly
2,2022090800,299,44881.0,Cooper Kupp,WR,14.215877,4.04,right,long,jet
3,2022090800,438,53678.0,Ben Skowronek,WR,13.786333,3.86,right,long,jet
4,2022090800,617,44985.0,Isaiah McKenzie,WR,11.86361,5.41,left,short,fly


In [46]:
tracking_cleaned.to_csv('tracking_cleaned.csv', index=False, header=True)

In [47]:
player_play = pd.read_csv('player_play.csv')
plays = pd.read_csv('plays.csv', quoting=csv.QUOTE_NONE, on_bad_lines='skip')

In [48]:
print(tracking_cleaned.columns)
print(player_play.columns)
print(plays.columns)

Index(['gameId', 'playId', 'nflId', 'displayName', 'position', 'speed at snap',
       'lateral distance traveled', 'offense movement direction',
       'motion direction', 'motion type'],
      dtype='object')
Index(['gameId', 'playId', 'nflId', 'teamAbbr', 'hadRushAttempt',
       'rushingYards', 'hadDropback', 'passingYards', 'sackYardsAsOffense',
       'hadPassReception', 'receivingYards', 'wasTargettedReceiver',
       'yardageGainedAfterTheCatch', 'fumbles', 'fumbleLost',
       'fumbleOutOfBounds', 'assistedTackle', 'forcedFumbleAsDefense',
       'halfSackYardsAsDefense', 'passDefensed', 'quarterbackHit',
       'sackYardsAsDefense', 'safetyAsDefense', 'soloTackle', 'tackleAssist',
       'tackleForALoss', 'tackleForALossYardage', 'hadInterception',
       'interceptionYards', 'fumbleRecoveries', 'fumbleRecoveryYards',
       'penaltyYards', 'penaltyNames', 'wasInitialPassRusher',
       'causedPressure', 'timeToPressureAsPassRusher',
       'getOffTimeAsPassRusher', 'inMotion

In [58]:
player_play = player_play.merge(players, how='left', left_on='nflId', right_on='nflId')

In [60]:
pbp = player_play.merge(tracking_cleaned, how='left', left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])

In [66]:
pbp.rename(columns={'displayName': 'motion player name', 'position_x':'non motion position', 'position_y': 'motion player position',
                    'nflId_x':'non motion nflId', 'nflId_y': 'motion player position'}, inplace=True)

In [69]:
pbp.to_csv('pbp.csv', index=False, header=True)