In [85]:
# Competition Link: https://www.kaggle.com/competitions/nfl-big-data-bowl-2024/data

    # games.csv
    # players.csv
    # plays.csv
    # tackles.csv

    # tracking_week_N.csv (weeks 1-9) (2022 NFL season)
    
# Other Allowable Sources:
    # ProFootballRef
    # NFLVerse

In [86]:
# General Packages
import pandas as pd
import numpy as np
from scipy.interpolate import make_interp_spline
import seaborn as sns
from datetime import date
import datetime
import missingno as msno
import os
import pickle
from tqdm import tqdm
import dataframe_image as dfi
import warnings
from matplotlib import animation
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import ipywidgets as widgets

# PDF Packages
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib import colors
from reportlab.lib.colors import red
from reportlab.lib.colors import black
from reportlab.lib.units import inch
from PyPDF2 import PdfMerger

# Modeling Packages
import xgboost as xg
from sklearn.metrics import mean_squared_error

# My Packages
import cfbd_api_updater

# Package Options
pd.set_option('display.max_columns',200)
pd.set_option('display.max_rows',1000)
warnings.filterwarnings("ignore")
%matplotlib inline

def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 6.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='darkgreen', zorder=0)

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax




In [87]:
games_df = pd.read_csv('../data/raw/kaggle/games.csv')
games_df

Unnamed: 0,gameId,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore
0,2022090800,2022,1,09/08/2022,20:20:00,LA,BUF,10,31
1,2022091100,2022,1,09/11/2022,13:00:00,ATL,NO,26,27
2,2022091101,2022,1,09/11/2022,13:00:00,CAR,CLE,24,26
3,2022091102,2022,1,09/11/2022,13:00:00,CHI,SF,19,10
4,2022091103,2022,1,09/11/2022,13:00:00,CIN,PIT,20,23
5,2022091104,2022,1,09/11/2022,13:00:00,DET,PHI,35,38
6,2022091105,2022,1,09/11/2022,13:00:00,HOU,IND,20,20
7,2022091106,2022,1,09/11/2022,13:00:00,MIA,NE,20,7
8,2022091107,2022,1,09/11/2022,13:00:00,NYJ,BAL,9,24
9,2022091109,2022,1,09/11/2022,13:00:00,WAS,JAX,28,22


In [88]:
players_df = pd.read_csv('../data/raw/kaggle/players.csv')
for idx,row in players_df.iterrows():
    players_df.at[idx,'Height_IN'] = int(str(row['height']).split('-')[0])*12 + int(str(row['height']).split('-')[1])
players_df = players_df.drop('height',axis=1).reset_index().drop('index',axis=1).rename(columns={'Height_IN':'height'})
players_df[['displayName','nflId','position','height','weight']]

Unnamed: 0,displayName,nflId,position,height,weight
0,Tom Brady,25511,QB,76.0,225
1,Jason Peters,29550,T,76.0,328
2,Aaron Rodgers,29851,QB,74.0,225
3,Marcedes Lewis,30842,TE,78.0,267
4,Matt Ryan,33084,QB,76.0,217
...,...,...,...,...,...
1678,Ryder Anderson,55200,DT,78.0,266
1679,Jake Hummel,55212,ILB,72.0,230
1680,Prince Emili,55239,DT,74.0,300
1681,Ja'Marcus Ingram,55240,CB,73.0,185


In [89]:
plays_df = pd.read_csv('../data/raw/kaggle/plays.csv')

for idx,row in plays_df.iterrows():
    if row['possessionTeam'] == row['yardlineSide']:
        # Ball in own half
        plays_df.at[idx,'YardsFromScoring'] = 100 - row['yardlineNumber']
    else:
        plays_df.at[idx,'YardsFromScoring'] = row['yardlineNumber']

plays_df[['gameId','playId','ballCarrierId','quarter','down','yardsToGo','possessionTeam','defensiveTeam','yardlineSide','yardlineNumber','gameClock','preSnapHomeScore','preSnapVisitorScore','offenseFormation','defendersInTheBox','YardsFromScoring']]

Unnamed: 0,gameId,playId,ballCarrierId,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,preSnapVisitorScore,offenseFormation,defendersInTheBox,YardsFromScoring
0,2022100908,3537,48723,4,1,10,ATL,TB,ATL,41,7:52,21,7,SHOTGUN,7.0,59.0
1,2022091103,3126,52457,4,1,10,PIT,CIN,PIT,34,7:38,14,20,SHOTGUN,7.0,66.0
2,2022091111,1148,42547,2,2,5,LV,LAC,LV,30,8:57,10,3,I_FORM,6.0,70.0
3,2022100212,2007,46461,3,2,10,DEN,LV,DEN,37,13:12,19,16,SINGLEBACK,6.0,63.0
4,2022091900,1372,47857,2,1,10,BUF,TEN,TEN,35,8:33,7,7,I_FORM,7.0,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12481,2022100204,123,43293,1,1,10,DAL,WAS,WAS,39,13:31,0,0,SINGLEBACK,6.0,39.0
12482,2022091200,3467,46189,4,1,10,SEA,DEN,SEA,30,6:08,17,16,SINGLEBACK,7.0,70.0
12483,2022101605,3371,44860,4,1,10,CIN,NO,CIN,41,9:35,26,21,SHOTGUN,6.0,59.0
12484,2022100207,2777,52449,3,1,10,IND,TEN,TEN,34,2:02,17,24,SHOTGUN,6.0,34.0


In [90]:
tackles_df = pd.read_csv('../data/raw/kaggle/tackles.csv')
tackles_df # KEY FIELD = ['gameId','playId','nflId']

# TARGETS FOR MODEL -> p(tackle), p(assist), p(forcedFumble), p(pff_missedTackle)

# PREDICTORS:
    # Tracking Data -> x, y, s, a, dis, o, dir
        # Engineered -> closest_def_distance, block_engaged, distance_to_ball, open_path_to_ball_carrier, 
                      # free_blocker_w_ball_carrier
        
    # Player-Level Data -> height, weight, position
    
    # Game-Context Data -> 
    
    # Play-Context Data -> quarter, offenseFormation, defendersInTheBox, YardsFromScoring
    
    # Venue-Context Data -> domed_stadium, temperature, raining, sunset_during_game, orientation_of_field
        # SOURCE: Use Stadium Loc Lat/Lon on Google Mixed w/ Weather API

Unnamed: 0,gameId,playId,nflId,tackle,assist,forcedFumble,pff_missedTackle
0,2022090800,101,42816,1,0,0,0
1,2022090800,393,46232,1,0,0,0
2,2022090800,486,40166,1,0,0,0
3,2022090800,646,47939,1,0,0,0
4,2022090800,818,40107,1,0,0,0
...,...,...,...,...,...,...,...
17421,2022091113,2494,43533,0,0,0,1
17422,2022092502,3510,42406,0,0,0,1
17423,2022091113,3642,43478,0,0,0,1
17424,2022091901,3578,42431,0,0,0,1


In [91]:
tracking_df = pd.read_csv('../data/raw/kaggle/tracking_week_1.csv')

In [92]:
def basic_play_plot(tracking_df, plays_df, games_df, game_id, play_id):

    df = tracking_df[tracking_df['gameId']==game_id]
    df = df[df['playId']==play_id].reset_index().drop('index',axis=1)
    df = df[['frameId','club','displayName','x','y']]

    play_df = plays_df[['gameId','playId','ballCarrierId','quarter','down','yardsToGo','possessionTeam','defensiveTeam','yardlineSide','yardlineNumber','gameClock','preSnapHomeScore','preSnapVisitorScore','offenseFormation','defendersInTheBox','YardsFromScoring','playDescription']]
    play_df = play_df[play_df['gameId']==game_id]
    play_df = play_df[play_df['playId']==play_id].reset_index().drop('index',axis=1)

    game_df = games_df[games_df['gameId']==game_id]
    
    _date = str(game_df['gameDate'].iat[0])
    
    _down = play_df['down'].iat[0]
    _quarter = play_df['quarter'].iat[0]
    _yards_to_go = play_df['yardsToGo'].iat[0]
    _clocktime = play_df['gameClock'].iat[0]

    _off_team = play_df['possessionTeam'].iat[0]
    _def_team = play_df['defensiveTeam'].iat[0]

    _yardline = play_df['yardlineNumber'].iat[0]
    _yardline_side = play_df['yardlineSide'].iat[0]

    _home_score = play_df['preSnapHomeScore'].iat[0]
    _away_score = play_df['preSnapVisitorScore'].iat[0]

    _home_team = game_df['homeTeamAbbr'].iat[0]
    _away_team = game_df['visitorTeamAbbr'].iat[0]

    _play_desc = play_df['playDescription'].iat[0]

    venue_df = pd.read_csv('../data/interim/team_id_listing.csv')
    venue_df = venue_df[['TeamAbbr','TeamCity','TeamState']]
    venue_df = venue_df[venue_df['TeamAbbr']==_home_team].reset_index().drop('index',axis=1)

    _venue_city = venue_df['TeamCity'].iat[0]
    _venue_state = venue_df['TeamState'].iat[0]

    _venue_loc = f"{str(_venue_city)}, {str(_venue_state)}"
    
    if _down == 1: _down = '1st'
    elif _down == 2: down = '2nd'
    elif _down == 3: _down = '3rd'
    elif _down == 4: _down = '4th'

    if _quarter == 1: _quarter = '1st'
    elif _quarter == 2: _quarter = '2nd'
    elif _quarter == 3: _quarter = '3rd'
    elif _quarter == 4: _quarter = '4th'

    plt_title = f"{_home_score}  {_home_team}     {_away_team}  {_away_score}"
    plt_subtitle = f"{_date} | {_venue_loc} | {_quarter}  {_clocktime} | {_off_team} Ball | {_down} & {_yards_to_go} @ {_yardline_side} {_yardline}"

    # Setting Up DFs
    df_ball = df[df['club']=='football'].reset_index().drop('index',axis=1)
    df_no_ball = df[df['club']!='football'].reset_index().drop('index',axis=1)
    df_home = df[df['club']==_home_team].reset_index().drop('index',axis=1)
    df_away = df[df['club']==_away_team].reset_index().drop('index',axis=1)
    
    # Grabbing Colors
    color_df = pd.read_csv('../data/interim/team_id_listing.csv')
    color_df = color_df[['TeamAbbr','Primary_Color_Hex_Code','Secondary_Color_Hex_Code']]
    _home_color = color_df[color_df['TeamAbbr']==_home_team].reset_index()['Primary_Color_Hex_Code'].iat[0]
    _away_color = color_df[color_df['TeamAbbr']==_away_team].reset_index()['Primary_Color_Hex_Code'].iat[0]
    _home_color_2 = color_df[color_df['TeamAbbr']==_home_team].reset_index()['Secondary_Color_Hex_Code'].iat[0]
    _away_color_2 = color_df[color_df['TeamAbbr']==_away_team].reset_index()['Secondary_Color_Hex_Code'].iat[0]
    
    
    def play_plot(i=1):
    
        fig, ax = create_football_field()

        marker_kwargs = {'marker': 'o', 'linestyle': 'None'}

        # Plot The Home Players
        for plyr in df_home['displayName'].unique().tolist():
            df_plyr = df_home[df_home['displayName']==plyr].reset_index().drop('index',axis=1)
            df_plyr = df_plyr[df_plyr['frameId']==i].reset_index().drop('index',axis=1)

            ax.plot([df_plyr['x'].iat[0]], [df_plyr['y'].iat[0]], ms=10, markerfacecolor=_home_color, markeredgecolor=_home_color_2, **marker_kwargs)  # purple

        # Plot The Away Players
        for plyr in df_away['displayName'].unique().tolist():
            df_plyr = df_away[df_away['displayName']==plyr].reset_index().drop('index',axis=1)
            df_plyr = df_plyr[df_plyr['frameId']==i].reset_index().drop('index',axis=1)

            ax.plot([df_plyr['x'].iat[0]], [df_plyr['y'].iat[0]], ms=10, markerfacecolor=_away_color, markeredgecolor=_away_color_2, **marker_kwargs)  # purple

        # PLOT THE BALL
        df_ball_cut = df_ball[df_ball['frameId']==i].reset_index().drop('index',axis=1)
        ax.plot([df_ball_cut['x'].iat[0]], [df_ball_cut['y'].iat[0]], ms=6, markerfacecolor='#594716', markeredgecolor='white',**marker_kwargs)  # purple

        plt.title(f"{plt_subtitle}",fontsize=12)
        fig.suptitle(f"{plt_title}",fontsize=18)


        fig.text(0.5, 0.14, 
                 f"{_play_desc}",
                 horizontalalignment="center")


        home_patch = mpatches.Patch(color=_home_color, label=_home_team)
        away_patch = mpatches.Patch(color=_away_color, label=_away_team)
        fig.legend(handles=[home_patch, away_patch],loc='lower center')
        
        plt.show()
    
        return
    
    widgets.interact(play_plot, 
                     i=widgets.Play(min=1, max=df_ball['frameId'].max()));
    
    return

In [93]:
basic_play_plot(tracking_df = tracking_df, 
                plays_df = plays_df,
                games_df = games_df,
                game_id = 2022091102,
                play_id = 3449)

interactive(children=(Play(value=1, description='i', max=64, min=1), Output()), _dom_classes=('widget-interact…

In [94]:
# WAYYYY DOWN THE LINE... FrameFill for Milliseconds (100 fills between frames)

In [23]:
# PROJECT IDEA:
    # Create a Model Suite Including Animated DataViz & Dynamic Leaderboards To Track a Plethora of Tackling-Based Models


# MODEL IDEAS:
    # p(tackle)                                           -> determine 
    # p(assist)                                           -> determine each DEF player's p(assist) for each frame
    # p(forced_fumble)                                    -> determine each DEF player's p(forced_fumble) for each frame
    # p(missed_tackle)                                    -> determine each DEF player's p(missed tackle) for each frame of play
    
    # E(Yds Gained At LOS)                                -> determine OFF e(Yds) based on data collected as carrier crosses LOS (runs only)
    # E(YAC)                                              -> determine OFF e(Yds) based on data collected as receiver catches the ball
    # E(Yds at First Contact)

# METRIC IDEAS:
    # PlayerDef
        # TVOE (Tackle Value Over Expectation)            [p(tackle) model]
        # YSTK (Yds Saved By Tackle)                      [E(Yds Gained At LOS) model]
        # FVOE (Fumble Value Over Expectation)            [p(forced fumble) model]
        # DTAV (Defensive Tackle Assist Value)            [p(assist) model]
        # MTAF (Missed Tackle Avoidance Frequency)        [p(missed_tackle) model]
        
    
    # PlayerOff
        # OTEV (Offensive Tackle Elusiveness Value)       [p(missed_tackle) model]
        # Oline Generating Low-EV 
    
    # TmDef
        # Agg_YSTK (exc Garbage Time)                     [E(Yds Gained At LOS) model]
        # Agg_TVOE (exc Garbage Time)                     [p(tackle) model]
        
    # TmOff
        # Agg_YSTK (exc Garbage Time)                     [E(Yds Gained At LOS) model]
