In [56]:

import numpy as np
import pandas as pd

import pickle

intermediate_data = pickle.load(open("../data/nfl-big-data-bowl-2024/_play_by_play_intermediate.pkl", "rb"))


In [57]:
test_play = intermediate_data.iloc[0]
print(test_play.keys())
tracking_data = test_play["player_tracking"]
ball_carrier_id = test_play["play_features"].loc["ballCarrierId"]
ball_carrier_tracking_data = tracking_data.loc[:, ball_carrier_id]


dict_keys(['game_id', 'play_id', 'player_tracking', 'event_timeseries', 'players_on_the_field', 'play_features', 'tackle_successful', 'yards_after_contact'])


In [75]:
def get_relative_features_to_ball_carrier(group):
    group_ = group.T.droplevel(0, axis=1)
    dist = np.sqrt(np.square(group_.loc[:, ['x', 'y']] - ball_carrier_tracking_data.loc[:, ['x', 'y']]).sum(axis=1))
    
    delta_angle = ball_carrier_tracking_data.loc[:, 'dir'] - group_.loc[:, 'dir']

    player_speed = group_.loc[:, 's']
    ball_carrier_speed = ball_carrier_tracking_data.loc[:, 's']
    rel_spd = np.sqrt(player_speed ** 2 + ball_carrier_speed ** 2 - 2 * player_speed * ball_carrier_speed * np.cos(delta_angle))

    player_accel = group_.loc[:, 'a']
    ball_carrier_accel = ball_carrier_tracking_data.loc[:, 'a']
    rel_accel = np.sqrt(player_accel ** 2 + ball_carrier_accel ** 2 - 2 * player_accel * ball_carrier_accel * np.cos(delta_angle))
    
    return pd.DataFrame({
        "distance": dist,
        "relative_speed": rel_spd,
        "relative_acceleration": rel_accel,
        "delta_angle": delta_angle,
        "cosine_similarity": np.cos(delta_angle),
    })

non_ball_carriers = tracking_data.T.groupby(level=0).filter(lambda g: g.name != ball_carrier_id)
geometric_features = non_ball_carriers.groupby(level=0).apply(get_relative_features_to_ball_carrier)
geometric_features


Unnamed: 0,Unnamed: 1,distance,relative_speed,relative_acceleration,delta_angle,cosine_similarity
35472.0,0,4.240814,6.255264,3.332859,153.53,-0.917900
35472.0,1,4.471532,6.467044,4.004275,160.74,-0.868427
35472.0,2,4.800021,6.663768,4.336356,166.06,-0.902864
35472.0,3,5.204652,6.795110,4.853028,172.40,-0.925820
35472.0,4,5.678424,6.713280,5.328899,179.56,-0.882700
...,...,...,...,...,...,...
53532.0,15,2.951491,11.578340,2.817839,317.90,-0.825820
53532.0,16,2.432879,4.241660,1.507858,320.87,0.909985
53532.0,17,1.952562,12.350827,3.014634,-34.26,-0.956067
53532.0,18,1.550290,12.325017,4.070641,-27.94,-0.944629


In [88]:
onfield = test_play['players_on_the_field']
play_features = test_play['play_features']
offense = onfield.loc[(onfield["club"] == play_features["possessionTeam"]) & (onfield["nflId"] != play_features["ballCarrierId"]), "nflId"]
defense = onfield.loc[onfield["club"] == play_features["defensiveTeam"], "nflId"]

offense_feature_groups = geometric_features.loc[pd.IndexSlice[offense]].groupby(level=0)
defense_feature_groups = geometric_features.loc[pd.IndexSlice[defense]].groupby(level=0)

offense_features = pd.concat([g.reset_index(drop=True) for _, g in offense_feature_groups], axis=1, keys=offense_feature_groups.groups.keys())
defense_features =  pd.concat([g.reset_index(drop=True) for _, g in defense_feature_groups], axis=1, keys=defense_feature_groups.groups.keys())
offense_raw = tracking_data.loc[:, pd.IndexSlice[offense]]
defense_raw = tracking_data.loc[:, pd.IndexSlice[defense]]


In [90]:
offense_raw


Unnamed: 0_level_0,35472.0,35472.0,35472.0,35472.0,35472.0,35472.0,42392.0,42392.0,42392.0,42392.0,...,52536.0,52536.0,52536.0,52536.0,53522.0,53522.0,53522.0,53522.0,53522.0,53522.0
Unnamed: 0_level_1,x,y,s,a,o,dir,x,y,s,a,...,s,a,o,dir,x,y,s,a,o,dir
0,82.42,26.13,2.43,0.37,196.42,140.67,81.75,28.39,1.18,2.36,...,3.43,3.38,19.41,311.87,86.28,33.25,1.57,3.75,20.01,118.94
1,82.58,25.96,2.34,0.93,203.85,138.66,81.71,28.53,1.5,2.17,...,3.22,2.99,15.54,315.91,86.38,33.18,1.1,3.75,11.69,128.49
2,82.73,25.79,2.2,1.39,209.5,136.4,81.65,28.7,1.8,1.69,...,3.07,2.54,12.06,319.75,86.44,33.12,0.72,3.68,359.65,146.54
3,82.87,25.66,1.96,1.87,214.57,133.71,81.6,28.92,2.15,1.52,...,2.92,2.3,9.05,323.94,86.44,33.06,0.53,3.51,347.62,189.5
4,82.99,25.55,1.62,2.42,222.35,130.47,81.53,29.16,2.45,1.33,...,2.83,1.98,6.72,327.64,86.41,33.02,0.66,3.04,336.17,230.74
5,83.09,25.48,1.19,2.8,234.72,125.05,81.46,29.44,2.75,1.08,...,2.75,1.88,3.59,331.84,86.34,32.99,0.89,2.35,326.75,248.85
6,83.16,25.45,0.83,2.89,246.19,114.56,81.37,29.76,3.08,0.95,...,2.67,1.56,359.13,334.34,86.22,32.96,1.17,1.97,312.54,259.14
7,83.21,25.45,0.47,2.88,265.22,91.69,81.28,30.1,3.36,0.75,...,2.64,1.15,356.36,335.96,86.07,32.95,1.46,1.61,312.54,265.54
8,83.23,25.47,0.32,2.69,280.09,35.37,81.17,30.44,3.53,0.58,...,2.65,0.77,354.64,337.12,85.89,32.95,1.75,1.4,307.71,271.29
9,83.23,25.51,0.51,2.5,284.57,354.43,81.06,30.81,3.75,0.8,...,2.69,0.33,351.33,336.95,85.69,32.98,1.98,1.17,303.63,275.79


In [82]:
geometric_features.loc[pd.IndexSlice[[46076], :]]


KeyError: '[46076] not in index'