In [1]:
import pandas as pd 
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import math

In [2]:
# Reading in all competiton data 

games = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/games.csv")
player_play = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/player_play.csv")
players = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/players.csv")
plays = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/plays.csv")

# Tracking data
tracking_w1 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_1.csv")
tracking_w2 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_2.csv")
tracking_w3 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_3.csv")
tracking_w4 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_4.csv")
tracking_w5 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_5.csv")
tracking_w6 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_6.csv")
tracking_w7 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_7.csv")
tracking_w8 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_8.csv")
tracking_w9 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/tracking_week_9.csv")

In [3]:
# This function takes in a dataframe and returns that dataframe filtered for plays where there is a shift or motion
def filter_dataset(data, player_play):
    m1 = player_play['shiftSinceLineset'] == 1
    m2 = player_play['motionSinceLineset'] == 1
    m3 = player_play['inMotionAtBallSnap'] == 1
    
    filter_data = player_play.loc[m1 | m2 | m3].reset_index(drop=True)
    filter_codes = [str(filter_data['gameId'][i]) + "-" + str(filter_data['playId'][i]) for i in range(len(filter_data))]

    data['play_code'] = [str(data['gameId'][i]) + "-" + str(data['playId'][i]) for i in range(len(data))]
    final = data[data['play_code'].isin(filter_codes)].reset_index(drop=True)
  

    return final

In [4]:
# A function that can visualize offense alignment 

def generate_graph(data, playId, gameId, players):
    t1 = data['gameId'] == gameId
    t2 = data['playId'] == playId
    t3 = data['frameType'] == 'BEFORE_SNAP'

    tmp = data.loc[t1 & t2 & t3] #[['displayName','x', 'y','nflId']]
    tmp2 = tmp.merge(players, left_on=['nflId'], right_on=['nflId'])
    final = tmp2.groupby(['displayName_x','position']).agg({'x':lambda x: list(x), 'y':lambda y: list(y)}).reset_index()
    final = final[final['position'].isin(['WR', 'RB', 'TE', 'T', 'G', 'QB', 'C', 'FB'])].reset_index(drop=True)

    for i in range(len(final)):
        label = final['position'][i] + "_" + str(i)
        plt.plot(final['x'][i], final['y'][i], label=label)

    plt.xlim(0,120)
    plt.ylim(0,53.3)
    plt.legend()
    plt.show()

In [5]:
# This function add a feature that determines the y-coordinate of a player when the line is set
def add_features(data):
    line_set_y  = []
    
    for i in range(len(data)):
        try:
            ls = data["event"][i].index("line_set")
            if ls < 0:
                ls = 0
            
            line_set_y.append(data['y'][i][ls])
        except:
            line_set_y.append(0)

    data['line_set_y'] = line_set_y 

    return data

In [6]:
# This function determines the reciever alignment when the line is set, before any motions and before the ball is snapped
def get_reciever_alignment(data):
    linemen = data[data['position'].isin(["C","G","T"])]
    skills = data[~data['position'].isin(["C", "G", "T"])].reset_index(drop=True)
    mx = np.max(linemen['line_set_y'])
    mi = np.min(linemen['line_set_y'])

    below, above = 0, 0
    for i in range(len(skills)):
        if skills['line_set_y'][i] < mi:
            below += 1
        elif skills['line_set_y'][i] > mx:
            above += 1


    return str(below) + "x" + str(above)

In [7]:
# This function uses the helper functions above to create the final dataset that can be used in the analysis notebook.
def convert_data(df, plays):
    final_df = pd.DataFrame()
    game_id, play_id, ps_reciever_align = [],[],[]
    
    t = df[df['frameType']=='BEFORE_SNAP']
    track = t.groupby(['gameId', 'playId', 'nflId'])[['frameId','frameType','playDirection','x','y','s','a','dis','o','dir','event']].agg(lambda x: list(x)).reset_index()
    track = track.merge(players, left_on=['nflId'], right_on=['nflId'])
    track = track.merge(player_play, left_on=['nflId','gameId','playId'], right_on=['nflId','gameId','playId'])
    
    for i in tqdm(range(len(plays))):        
        m1 = track['gameId'] == plays['gameId'][i]
        m2 = track['playId'] == plays['playId'][i]

        data = track.loc[m1 & m2].reset_index(drop=True)

        if len(data) == 0:
            continue

        else:
            offense = ['WR', 'RB', 'TE', 'FB','T','G','QB', 'C']
            defense = ['CB', 'OLB', 'DE', 'DT', 'ILB', 'FS', 'SS', 'NT', 'MLB', 'DB', 'LB']
            side = []
            
            for j in range(len(data['position'])):
                if data['position'][j] in offense:
                    side.append('O')
                elif data['position'][j] in defense:
                    side.append('D')
                else:
                    side.append('unknown')
            
            data['side'] = side
            data2 = data[data['side'] == 'O'].reset_index(drop=True)
            data3 = add_features(data2)
            data4 = data3[['line_set_y','position']]
            ps_reciever_align.append(get_reciever_alignment(data4))
            game_id.append(plays['gameId'][i])
            play_id.append(plays['playId'][i])

    
    final_df['game_id'] = game_id 
    final_df['play_id'] = play_id 
    final_df['initial_alignment'] = ps_reciever_align

    
    return final_df

In [8]:
tracking_w1 = filter_dataset(tracking_w1, player_play)
tracking_w2 = filter_dataset(tracking_w2, player_play)
tracking_w3 = filter_dataset(tracking_w3, player_play)
tracking_w4 = filter_dataset(tracking_w4, player_play)
tracking_w5 = filter_dataset(tracking_w5, player_play)
tracking_w6 = filter_dataset(tracking_w6, player_play)
tracking_w7 = filter_dataset(tracking_w7, player_play)
tracking_w8 = filter_dataset(tracking_w8, player_play)
tracking_w9 = filter_dataset(tracking_w9, player_play)

In [9]:
w1 = convert_data(tracking_w1, plays)
w2 = convert_data(tracking_w2, plays)
w3 = convert_data(tracking_w3, plays)
w4 = convert_data(tracking_w4, plays)
w5 = convert_data(tracking_w5, plays)
w6 = convert_data(tracking_w6, plays)
w7 = convert_data(tracking_w7, plays)
w8 = convert_data(tracking_w8, plays)
w9 = convert_data(tracking_w9, plays)

100%|██████████| 16124/16124 [00:19<00:00, 827.65it/s]
100%|██████████| 16124/16124 [00:18<00:00, 867.98it/s]
100%|██████████| 16124/16124 [00:22<00:00, 728.60it/s]
100%|██████████| 16124/16124 [00:18<00:00, 851.51it/s]
100%|██████████| 16124/16124 [00:18<00:00, 873.85it/s]
100%|██████████| 16124/16124 [00:18<00:00, 869.34it/s]
100%|██████████| 16124/16124 [00:18<00:00, 891.48it/s]
100%|██████████| 16124/16124 [00:18<00:00, 870.88it/s]
100%|██████████| 16124/16124 [00:17<00:00, 908.50it/s]


In [10]:
all_tracking = pd.concat([w1,w2,w3,w4,w5,w6,w7,w8,w9])

In [11]:
play_data = pd.merge(left=plays, right=all_tracking, how='left', left_on=['gameId', 'playId'], right_on=['game_id', 'play_id'])

In [12]:
# A shift from 1x3 to 3x1 was also identified but was not used for the analysis
m1 = play_data['initial_alignment'] == "1x3"
m2 = play_data['receiverAlignment'] == "3x1"
m3 = play_data['initial_alignment'] == "3x1"
m4 = play_data['receiverAlignment'] == "1x3"
motion1 = play_data.loc[(m1 & m2) | (m3 & m4)].reset_index(drop=True)

m5 = play_data['initial_alignment'] == "1x3"
m6 = play_data['receiverAlignment'] == "1x3"
m7 = play_data['initial_alignment'] == "3x1"
m8 = play_data['receiverAlignment'] == "3x1"
motion1_control = play_data.loc[(m5 & m6) | (m7 & m8)].reset_index(drop=True)

# A shift from 1x3 or 3x1 to 2x2 is identified and stored in a dataframe to use for further analysis
m9 = play_data['initial_alignment'] == "1x3"
m10 = play_data['receiverAlignment'] == "2x2"
m11 = play_data['initial_alignment'] == "3x1"
m12 = play_data['receiverAlignment'] == "2x2"
motion2 = play_data.loc[(m9 & m10) | (m11 & m12)].reset_index(drop=True)

m13 = play_data['initial_alignment'] == "2x2"
m14 = play_data['receiverAlignment'] == "2x2"
motion2_control = play_data.loc[~((m9 & m10) | (m11 & m12))].reset_index(drop=True)
motion2_control2 = play_data.loc[m13 & m14]

In [13]:
# The datasets are stored in csv files to be used in the "BDB25-Analysis" notebook
motion1.to_csv("motion1.csv", index=False)
motion1_control.to_csv("motion1_control.csv", index=False)
motion2.to_csv("motion2.csv", index=False)
motion2_control.to_csv("motion2_control.csv", index=False)
motion2_control2.to_csv("motion2_control2.csv")