In [270]:
import pandas as pd
import os
import numpy as np
import math
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm

In [2]:
# Read the CSV files
games = pd.read_csv("data/nfl-big-data-bowl-2024/games.csv")
players = pd.read_csv("data/nfl-big-data-bowl-2024/players.csv")

# Calculate height in inches
players['height'] = players['height'].str.extract(r'(\d+)').astype(int) * 12 + players['height'].str.extract(r'-(\d+)').astype(int)

# Select columns
players = players[['displayName', 'nflId', 'height', 'weight', 'position']]

plays = pd.read_csv("data/nfl-big-data-bowl-2024/plays.csv")
tackles = pd.read_csv("data/nfl-big-data-bowl-2024/tackles.csv")

# Read and combine tracking data for all weeks
tracking = pd.concat([pd.read_csv(f"data/nfl-big-data-bowl-2024/tracking_week_{week}.csv") for week in range(1, 10)])
ball_tracking = tracking.loc[tracking['nflId'].isna()][["gameId", "frameId" "playId", "x", "y"]].rename({"x" : "ball_x", "y" : "ball_y"}, axis = 1)

In [148]:
ball_tracking

Unnamed: 0,gameId,frameId,playId,ball_x,ball_y
484,2022090800,1,56,85.050003,33.810001
485,2022090800,2,56,83.150002,34.830002
486,2022090800,3,56,81.739998,35.590000
487,2022090800,4,56,80.139999,36.450001
488,2022090800,5,56,79.290001,36.930000
...,...,...,...,...,...
1150018,2022110700,40,3787,26.219999,19.680000
1150019,2022110700,41,3787,26.320000,19.610001
1150020,2022110700,42,3787,26.389999,19.559999
1150021,2022110700,43,3787,26.450001,19.520000


In [395]:
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)

class play:
    def __init__(self, game_id, play_id):
        self.play = plays.query("gameId == @game_id & playId ==  @play_id")
        self.ball_carry_id = ball_carrier = self.play.ballCarrierId.reset_index(drop =1)[0]
        self.tracking_df = tracking.query("gameId == @game_id & playId ==  @play_id")
        self.ball_track = ball_tracking.query("gameId == @game_id & playId ==  @play_id")
        self.tackle_oppurtunities = tackles.query("gameId == @game_id & playId ==  @play_id")
        self.tackle_attempts = self.get_tackle_attempt_frames()
    
    def get_tackle_attempt_frames(self):
        tacklers = self.tackle_oppurtunities.nflId.unique()
        tackler_tracking = self.tracking_df.query("nflId in @tacklers").merge(
            self.ball_track, on = ["gameId", "playId", "frameId"], how = "left")
        tackler_tracking['distance_from_ball'] = euclidean_distance(tackler_tracking['x'], tackler_tracking['y'],
                                                                    tackler_tracking['ball_x'], tackler_tracking['ball_y'])
        tackler_tracking['min_distance_from_ball'] = tackler_tracking.groupby('nflId')['distance_from_ball'].transform('min')
        return tackler_tracking.query("min_distance_from_ball == distance_from_ball")[["nflId", "frameId", "ball_x", "ball_y"]].rename(
            {"ball_x" : "tackle_x", "ball_y" : "tackle_y"}, axis = 1
        )
    
    def get_tackle_attempt_matrix(self, N):
        tackles_attempt_mat = np.zeros((int(120/N), math.ceil(54/N)))
        for item in list(zip(self.tackle_attempts.tackle_x, self.tackle_attempts.tackle_y)):
            tackles_attempt_mat[int(item[0]/N), int(item[1]/N)] = 1
        return tackles_attempt_mat

    def get_yards_after_t(self, frame_id):
        end_of_play_x = self.ball_track.ball_x[self.ball_track.frameId == max(self.ball_track.frameId)].reset_index(drop = 1)[0]
        current_x = self.ball_track.ball_x[self.ball_track.frameId == frame_id].reset_index(drop = 1)[0]
        if self.tracking_df.playDirection.reset_index(drop = 1)[0] == "left":
            return -(end_of_play_x - current_x)
        else:
            return end_of_play_x - current_x
    
    def get_current_positions_matrix(self, frame_id):
        current_positions = self.tracking_df.query("frameId == @frame_id")[["nflId", "x", "y"]].merge(players, on = "nflId", how = "left")
        current_positions['type'] = current_positions['position'].apply(
            lambda x: "Offense" if x in ["QB", "TE", "WR", "G", "OLB", "RB", "C", "FB"] else "Defense")
        current_positions['type'] = current_positions.apply(lambda row: 'Ball' if pd.isna(row['nflId']) else row['type'], axis=1) 
        tackle_matrix = np.zeros((int(120/3), int(53.3/3), 3))
        for item in list(zip(current_positions.x, current_positions.y,
                             current_positions.weight, current_positions.type)):
            tackle_matrix[int(item[0]/3), int(item[1]/3), 0] = 1
            tackle_matrix[int(item[0]/3), int(item[1]/3), 1] = item[2]
            tackle_matrix[int(item[0]/3), int(item[1]/3), 2] = 1 if item[3] == "Defense" else 0
        return tackle_matrix
    
    def refine_tracking(self, frame_id):
        current_positions = self.tracking_df.query("frameId == @frame_id").merge(players, on = "nflId", how = "left")
        current_positions['type'] = current_positions['position'].apply(
            lambda x: "Offense" if x in ["QB", "TE", "WR", "G", "OLB", "RB", "C", "FB"] else "Defense")
        current_positions['type'] = current_positions.apply(lambda row: 'Ball' if pd.isna(row['nflId']) else row['type'], axis=1)
        current_positions.loc[current_positions.nflId == self.ball_carry_id, 'type'] = "Carrier"
        return current_positions[['nflId', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'height', 'weight', 'type']]
        
    def plot_tackle_attempt_matrix(self, frame_id = 1):
        #plt.imshow(self.get_tackle_attempt_matrix(), cmap='binary', interpolation='none')
        #plt.colorbar()  
        #plt.show()
        data = self.get_current_positions(frame_id)
        presence = data[:, :, 0] == 1
        size = data[:, :, 1]
        shape = data[:, :, 2] == 1

        fig, ax = plt.subplots()

        # Iterate through the matrix and add dots
        for i in range(int(120/3)):
            for j in range(int(53.3/3)):
                if presence[i, j]:
                    dot_size = size[i, j]  # Adjust the scale for the size
                    dot_shape = 'o' if shape[i, j] else 's'  # 'o' for circle, 's' for square
                    ax.scatter(j, i, s=dot_size, marker=dot_shape)

        # Set axis labels and show the plot
        ax.set_xlabel('X-axis')
        ax.set_ylabel('Y-axis')
        plt.show()
    
    def get_grid_features(self, frame_id, N, matrix_form = True):
        df = self.refine_tracking(frame_id = frame_id)
        grid_features = pd.DataFrame()
        return_mat = np.zeros((len(list(range(0, 120, N))), len(list(range(0, 54, N))), 24))
        for x_low in list(range(0, 120, N)):
            for y_low in list(range(0, 54, N)):
                off_df = df[(df['type'] == "Offense") & (df['nflId'] != self.ball_carry_id)]
                def_df = df[df['type'] == "Defense"]
                ball_df = df[df['nflId'] == self.ball_carry_id]

                current_offensive_player_density = len(off_df[(off_df['x'] <= x_low + N) & (off_df['x'] >= x_low) &
                                                            (off_df['y'] <= y_low + N) & (off_df['y'] >= y_low)])
                velocities_offensive_toward_point = off_df['s'] * (np.cos(off_df['dir'] * (math.pi / 180)) * (x_low + N/2 - off_df['x']) +
                                                                np.sin(off_df['dir'] * (math.pi / 180)) * (y_low + N/2 - off_df['y']))
                acceleration_offensive_toward_point = off_df['a'] * (np.cos(off_df['dir'] * (math.pi / 180)) * (x_low + N/2 - off_df['x']) +
                                                                    np.sin(off_df['dir'] * (math.pi / 180)) * (y_low + N/2 - off_df['y']))
                distance_offense_from_point = np.sqrt((off_df['x'] - (x_low + N/2))**2 + (off_df['y'] - (y_low + N/2))**2)

                current_defensive_player_density = len(def_df[(def_df['x'] <= x_low + N) & (def_df['x'] >= x_low) &
                                                            (def_df['y'] <= y_low + N) & (def_df['y'] >= y_low)])
                velocities_defensive_toward_point = def_df['s'] * (np.cos(def_df['dir'] * (math.pi / 180)) * (x_low + N/2 - def_df['x']) +
                                                                np.sin(def_df['dir'] * (math.pi / 180)) * (y_low + N/2 - def_df['y']))
                acceleration_defensive_toward_point = def_df['a'] * (np.cos(def_df['dir'] * (math.pi / 180)) * (x_low + N/2 - def_df['x']) +
                                                                    np.sin(def_df['dir'] * (math.pi / 180)) * (y_low + N/2 - def_df['y']))
                distance_defensive_from_point = np.sqrt((def_df['x'] - (x_low + N/2))**2 + (def_df['y'] - (y_low + N/2))**2)

                current_ballcarrier_player_density = len(ball_df[(ball_df['x'] <= x_low + N) & (ball_df['x'] >= x_low) &
                                                                (ball_df['y'] <= y_low + N) & (ball_df['y'] >= y_low)])
                velocities_ballcarrier_toward_point = ball_df['s'] * (np.cos(ball_df['dir'] * (math.pi / 180)) * (x_low + N/2 - ball_df['x']) +
                                                                    np.sin(ball_df['dir'] * (math.pi / 180)) * (y_low + N/2 - ball_df['y']))
                acceleration_ballcarrier_toward_point = ball_df['a'] * (np.cos(ball_df['dir'] * (math.pi / 180)) * (x_low + N/2 - ball_df['x']) +
                                                                        np.sin(ball_df['dir'] * (math.pi / 180)) * (y_low + N/2 - ball_df['y']))
                distance_ballcarrier_from_point = np.sqrt((ball_df['x'] - (x_low + N/2))**2 + (ball_df['y'] - (y_low + N/2))**2)
                ret = pd.DataFrame({'grid_id': [f"{x_low} {y_low}"],
                                                    'off_density': [current_offensive_player_density],
                                                    'def_density': [current_defensive_player_density],
                                                    'ballcarrier_density': [current_ballcarrier_player_density],
                                                    'off_velocity_mean': [np.mean(velocities_offensive_toward_point)],
                                                    'off_velocity_sum': [np.sum(velocities_offensive_toward_point)],
                                                    'off_velocity_std': [np.std(velocities_offensive_toward_point)],
                                                    'def_velocity_mean': [np.mean(velocities_defensive_toward_point)],
                                                    'def_velocity_sum': [np.sum(velocities_defensive_toward_point)],
                                                    'def_velocity_std': [np.std(velocities_defensive_toward_point)],
                                                    'ballcarrier_velocity': [velocities_ballcarrier_toward_point.values[0]],
                                                    'off_acc_mean': [np.mean(acceleration_offensive_toward_point)],
                                                    'off_acc_sum': [np.sum(acceleration_offensive_toward_point)],
                                                    'off_acc_std': [np.std(acceleration_offensive_toward_point)],
                                                    'def_acc_mean': [np.mean(acceleration_defensive_toward_point)],
                                                    'def_acc_sum': [np.sum(acceleration_defensive_toward_point)],
                                                    'def_acc_std': [np.std(acceleration_defensive_toward_point)],
                                                    'ballcarrier_acc': [acceleration_ballcarrier_toward_point.values[0]],
                                                    'off_distance_mean': [np.mean(distance_offense_from_point)],
                                                    'off_distance_sum': [np.sum(distance_offense_from_point)],
                                                    'off_distance_std': [np.std(distance_offense_from_point)],
                                                    'def_distance_mean': [np.mean(distance_defensive_from_point)],
                                                    'def_distance_sum': [np.sum(distance_defensive_from_point)],
                                                    'def_distance_std': [np.std(distance_defensive_from_point)],
                                                    'ballcarrier_distance': [distance_ballcarrier_from_point.values[0]]})
                if matrix_form:
                    return_mat[int(x_low/N), int(y_low/N), :] = np.array(ret.drop(['grid_id'], axis = 1).iloc[0])
                else:
                    grid_features = pd.concat([grid_features, ret])
        if matrix_form:
            return return_mat
        else:
            return grid_features
        
    def get_training_data(self, frame_id, N):
        feature_mat = self.get_grid_features(frame_id = frame_id, N = N, matrix_form = True)
        label = self.get_tackle_attempt_matrix(N = N)
        return {"features" : feature_mat, "label" : label}

class TackleAttemptDataset:

    def __init__(self, images, labels):
        self.images = images
        self.labels = labels
        self.num_samples = len(images)
    
    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        image = torch.FloatTensor(self.images[idx])
        label = torch.FloatTensor(self.labels[idx])
        return image, label

In [396]:
# Do for only t = 10 to start

images = []
labels = []
for row in tqdm(range(len(plays))):
    play_row = plays.iloc[row,]
    play_object = play(game_id=play_row.gameId, play_id=play_row.playId)
    images.append(play_object.get_grid_features(frame_id = 10, N = 10, matrix_form = True))
    labels.append(play_object.get_tackle_attempt_matrix(N = 10))
train_data = TackleAttemptDataset(images = images, labels = labels)

  0%|          | 10/12486 [00:12<4:30:00,  1.30s/it]


KeyboardInterrupt: 

In [393]:
x = play(game_id=2022091103, play_id=3126)

tackles_attempt_mat = np.zeros((int(120/10), math.ceil(53.3/10)))
for item in list(zip(x.tackle_attempts.tackle_x, x.tackle_attempts.tackle_y)):
    tackles_attempt_mat[int(item[0]/10), int(item[1]/10)] = 1

IndexError: index 5 is out of bounds for axis 1 with size 5

In [390]:
item[1]/10

5.09199981689453

In [392]:
math.ceil(54/10)

6

In [394]:
int(53.3/10)

5

In [132]:
1 if "d" == "Defense" else 0

0

In [225]:
from torch_geometric.datasets import KarateClub

dataset = KarateClub()

  from .autonotebook import tqdm as notebook_tqdm


In [227]:
import torch
from torch import Tensor
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='.', name='Cora')


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


[(0, 0),
 (0, 5),
 (0, 10),
 (0, 15),
 (0, 20),
 (0, 25),
 (0, 30),
 (0, 35),
 (0, 40),
 (0, 45),
 (0, 50),
 (5, 0),
 (5, 5),
 (5, 10),
 (5, 15),
 (5, 20),
 (5, 25),
 (5, 30),
 (5, 35),
 (5, 40),
 (5, 45),
 (5, 50),
 (10, 0),
 (10, 5),
 (10, 10),
 (10, 15),
 (10, 20),
 (10, 25),
 (10, 30),
 (10, 35),
 (10, 40),
 (10, 45),
 (10, 50),
 (15, 0),
 (15, 5),
 (15, 10),
 (15, 15),
 (15, 20),
 (15, 25),
 (15, 30),
 (15, 35),
 (15, 40),
 (15, 45),
 (15, 50),
 (20, 0),
 (20, 5),
 (20, 10),
 (20, 15),
 (20, 20),
 (20, 25),
 (20, 30),
 (20, 35),
 (20, 40),
 (20, 45),
 (20, 50),
 (25, 0),
 (25, 5),
 (25, 10),
 (25, 15),
 (25, 20),
 (25, 25),
 (25, 30),
 (25, 35),
 (25, 40),
 (25, 45),
 (25, 50),
 (30, 0),
 (30, 5),
 (30, 10),
 (30, 15),
 (30, 20),
 (30, 25),
 (30, 30),
 (30, 35),
 (30, 40),
 (30, 45),
 (30, 50),
 (35, 0),
 (35, 5),
 (35, 10),
 (35, 15),
 (35, 20),
 (35, 25),
 (35, 30),
 (35, 35),
 (35, 40),
 (35, 45),
 (35, 50),
 (40, 0),
 (40, 5),
 (40, 10),
 (40, 15),
 (40, 20),
 (40, 25),
 (40, 

In [256]:
## Retry