In [1]:
from matplotlib import pyplot as plt
from scipy.stats import linregress
import numpy as np
from sklearn import datasets
import pandas as pd
import hvplot.pandas
import holoviews as hv

In [2]:
matchups_df = pd.read_csv('data/nfl-big-data-bowl-2024/matchups.csv')
matchups_df.head()

Unnamed: 0,matchup,gameId,week,home,away
0,LA_BUF,2022090800,1,LA,BUF
1,ATL_NO,2022091100,1,ATL,NO
2,CAR_CLE,2022091101,1,CAR,CLE
3,CHI_SF,2022091102,1,CHI,SF
4,CIN_PIT,2022091103,1,CIN,PIT


In [3]:
players_basic_df = pd.read_csv('data/nfl-big-data-bowl-2024/players_basic.csv')
players_basic_df.head()

Unnamed: 0,position,displayName
0,QB,Tom Brady
1,T,Jason Peters
2,QB,Aaron Rodgers
3,TE,Marcedes Lewis
4,QB,Matt Ryan


In [4]:
positions_all = players_basic_df['position'].unique()
print(positions_all)

['QB' 'T' 'TE' 'WR' 'DE' 'NT' 'SS' 'FS' 'G' 'OLB' 'DT' 'CB' 'RB' 'C' 'ILB'
 'MLB' 'FB' 'LS' 'DB']


In [5]:
positions_off = ['QB','T','TE','WR','G','FB','RB','C']
positions_def = ['DE','NT','SS','FS','OLB','DT','CB','ILB','MLB','DB']

In [6]:
tracking_df = pd.read_csv('data/nfl-big-data-bowl-2024/tracking_week_2.csv')
tracking_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,2022091500,55,40011.0,Travis Kelce,1,2022-09-15 20:16:32.700000,87.0,KC,left,87.2,24.9,0.0,0.0,0.0,263.11,138.55,
1,2022091500,55,40011.0,Travis Kelce,2,2022-09-15 20:16:32.799999,87.0,KC,left,87.2,24.9,0.0,0.0,0.0,263.11,142.54,
2,2022091500,55,40011.0,Travis Kelce,3,2022-09-15 20:16:32.900000,87.0,KC,left,87.2,24.9,0.0,0.0,0.0,262.47,143.82,
3,2022091500,55,40011.0,Travis Kelce,4,2022-09-15 20:16:33.000000,87.0,KC,left,87.2,24.9,0.0,0.0,0.0,262.47,149.71,
4,2022091500,55,40011.0,Travis Kelce,5,2022-09-15 20:16:33.099999,87.0,KC,left,87.2,24.9,0.01,0.15,0.0,262.47,309.38,


In [7]:
tracking_df = pd.merge(players_basic_df, tracking_df, how='left', left_on='displayName', right_on='displayName')

In [8]:
def clean_data(tracking_df):
    # Replace missing values with 0 in columns: 'jerseyNumber', 'nflId'
    tracking_df = tracking_df.fillna({'jerseyNumber': 0, 'nflId': 0})
    # Change column type to string for column: 'nflId'
    tracking_df = tracking_df.astype({'nflId': 'string'})
    # Replace all instances of ".0" with "" in column: 'nflId'
    tracking_df['nflId'] = tracking_df['nflId'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for column: 'jerseyNumber'
    tracking_df = tracking_df.astype({'jerseyNumber': 'string'})
    # Replace all instances of ".0" with "" in column: 'jerseyNumber'
    tracking_df['jerseyNumber'] = tracking_df['jerseyNumber'].str.replace(".0", "", case=False, regex=False)
    # Change column type to string for columns: 'club', 'playDirection', 'event', 'displayName', 'time'
    tracking_df = tracking_df.astype({'club': 'string', 'playDirection': 'string','event': 'string','displayName': 'string','gameId': 'string',})
    # Reduce the floats to two decimal points
    tracking_df[['x', 'y', 's', 'a', 'dis']] = tracking_df[['x', 'y', 's', 'a', 'dis']].round(2)
    # Change column type to datetime64[ns] for column: 'time'
    tracking_df = tracking_df.astype({'time': 'datetime64[ns]'})
    return tracking_df

tracking_df_clean = clean_data(tracking_df.copy())
tracking_df_clean.head()


Unnamed: 0,position,displayName,gameId,playId,nflId,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event
0,QB,Tom Brady,2022091804.0,378.0,25511,1.0,2022-09-18 13:13:57.900000,12,TB,right,33.26,27.1,0.0,0.0,0.0,93.07,77.36,
1,QB,Tom Brady,2022091804.0,378.0,25511,2.0,2022-09-18 13:13:58.000000,12,TB,right,33.27,27.1,0.0,0.0,0.0,92.08,78.2,
2,QB,Tom Brady,2022091804.0,378.0,25511,3.0,2022-09-18 13:13:58.099999,12,TB,right,33.27,27.09,0.0,0.0,0.0,91.37,79.0,
3,QB,Tom Brady,2022091804.0,378.0,25511,4.0,2022-09-18 13:13:58.200000,12,TB,right,33.26,27.09,0.0,0.0,0.0,91.37,80.26,
4,QB,Tom Brady,2022091804.0,378.0,25511,5.0,2022-09-18 13:13:58.299999,12,TB,right,33.26,27.09,0.0,0.0,0.0,91.37,82.45,


In [9]:
tracking_df_clean=pd.merge(tracking_df_clean, players_basic_df, how='left', left_on='displayName', right_on='displayName')
tracking_df_clean.head()

Unnamed: 0,position_x,displayName,gameId,playId,nflId,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position_y
0,QB,Tom Brady,2022091804.0,378.0,25511,1.0,2022-09-18 13:13:57.900000,12,TB,right,33.26,27.1,0.0,0.0,0.0,93.07,77.36,,QB
1,QB,Tom Brady,2022091804.0,378.0,25511,2.0,2022-09-18 13:13:58.000000,12,TB,right,33.27,27.1,0.0,0.0,0.0,92.08,78.2,,QB
2,QB,Tom Brady,2022091804.0,378.0,25511,3.0,2022-09-18 13:13:58.099999,12,TB,right,33.27,27.09,0.0,0.0,0.0,91.37,79.0,,QB
3,QB,Tom Brady,2022091804.0,378.0,25511,4.0,2022-09-18 13:13:58.200000,12,TB,right,33.26,27.09,0.0,0.0,0.0,91.37,80.26,,QB
4,QB,Tom Brady,2022091804.0,378.0,25511,5.0,2022-09-18 13:13:58.299999,12,TB,right,33.26,27.09,0.0,0.0,0.0,91.37,82.45,,QB


In [10]:
catalog_play = tracking_df_clean[['gameId', 'playId']].copy()
tracking_df_clean['playId'].value_counts()

56.0      6527
550.0     4025
1515.0    3916
3127.0    3814
2685.0    3784
          ... 
1172.0     242
3066.0     242
176.0      242
2437.0     242
400.0      242
Name: playId, Length: 1197, dtype: int64

In [34]:
def clean_data(tracking_df_clean):    # Filter rows based on column: 'event'
    tracking_df_clean = tracking_df_clean[tracking_df_clean['frameId'] == 6]
    tracking_df_clean = tracking_df_clean.rename(columns={'position_x': 'position'})
    tracking_df_clean = tracking_df_clean[tracking_df_clean['event'] == "ball_snap"]
    tracking_df_clean = tracking_df_clean[tracking_df_clean['playId'] == 3127]
    #tracking_df_clean = tracking_df_clean[tracking_df_clean['gameId'] == "2022091801.0"]
    return tracking_df_clean

single_frame_play = clean_data(tracking_df_clean.copy())
single_frame_play.head()

Unnamed: 0,position,displayName,gameId,playId,nflId,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position_y
125211,ILB,C.J. Mosley,2022091801.0,3127.0,41243,6.0,2022-09-18 15:22:00.099999,57,NYJ,left,27.61,26.48,0.78,0.22,0.08,76.03,20.44,ball_snap,ILB
132251,G,Joel Bitonio,2022091801.0,3127.0,41264,6.0,2022-09-18 15:22:00.099999,75,CLE,left,33.12,21.73,0.73,1.45,0.08,262.55,302.66,ball_snap,G
137765,SS,Lamarcus Joyner,2022091801.0,3127.0,41270,6.0,2022-09-18 15:22:00.099999,29,NYJ,left,17.73,27.53,0.36,0.35,0.03,66.22,202.72,ball_snap,SS
152169,DT,Justin Ellis,2022091805.0,3127.0,41336,6.0,2022-09-18 15:31:27.599999,71,NYG,left,84.37,30.56,0.39,1.38,0.04,88.91,67.58,ball_snap,DT
229284,OLB,Kwon Alexander,2022091801.0,3127.0,42467,6.0,2022-09-18 15:22:00.099999,9,NYJ,left,28.79,32.47,0.69,0.13,0.07,96.32,45.83,ball_snap,OLB


In [24]:
single_frame_play.hvplot.scatter(
    x="x",
    y="y",
    xlim=(0, 120),  # Specify the boundaries for the x-axis
    ylim=(0, 53.3),   # Specify the boundaries for the y-axis
    by="club",
    hover_cols=["displayName", "club", "position"]
)



bokeh backend could not plot any Elements in the Overlay.

:NdOverlay   [club]

In [13]:
def clean_data(tracking_df_clean):    # Filter rows based on column: 'event'
    #tracking_df_clean = tracking_df_clean[tracking_df_clean['frameId'] == 21]
    #tracking_df_clean = tracking_df_clean[tracking_df_clean['event'] == "ball_snap"]
    tracking_df_clean = tracking_df_clean[tracking_df_clean['playId'] == 764]
    return tracking_df_clean

anime_frame_play = clean_data(tracking_df_clean.copy())
anime_frame_play.head()

Unnamed: 0,position_x,displayName,gameId,playId,nflId,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position_y
258,QB,Tom Brady,2022091804.0,764.0,25511,1.0,2022-09-18 13:30:29.500000,12,TB,right,31.14,23.81,0.0,0.0,0.0,85.81,89.31,,QB
259,QB,Tom Brady,2022091804.0,764.0,25511,2.0,2022-09-18 13:30:29.599999,12,TB,right,31.14,23.82,0.0,0.0,0.0,85.81,77.97,,QB
260,QB,Tom Brady,2022091804.0,764.0,25511,3.0,2022-09-18 13:30:29.700000,12,TB,right,31.14,23.81,0.0,0.0,0.0,85.23,83.08,,QB
261,QB,Tom Brady,2022091804.0,764.0,25511,4.0,2022-09-18 13:30:29.799999,12,TB,right,31.14,23.81,0.0,0.0,0.0,85.23,77.18,,QB
262,QB,Tom Brady,2022091804.0,764.0,25511,5.0,2022-09-18 13:30:29.900000,12,TB,right,31.13,23.81,0.0,0.0,0.0,85.23,112.49,,QB
