In [1]:
# add local directory to import path
import os
import sys
module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
#### --- Standard imports ------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import HTML

# local import
import nflplot

In [3]:
from importlib import reload
reload(nflplot)

<module 'nflplot' from 'C:\\Users\\pschl\\Documents\\data_science\\kaggle_nfl_bdb_2022\\nflplot.py'>

**IMPORT DATA - This notebook assumes that the source data is in a "csv" subfolder relative to this notebook**

In [4]:
year = 2018
track_df = pd.read_csv(f'csv/tracking{year}.csv')
play_df = pd.read_csv('csv/plays.csv')
game_df = pd.read_csv('csv/games.csv')

In [5]:
pff_df = pd.read_csv('csv/PFFScoutingData.csv')

## Event Types

In [6]:
# play-level
play_df['specialTeamsPlayType'].value_counts()

Kickoff        7843
Punt           5991
Extra Point    3488
Field Goal     2657
Name: specialTeamsPlayType, dtype: int64

In [7]:
play_df.loc[play_df['specialTeamsPlayType']=='Punt', 'specialTeamsResult'].value_counts()

Return                      2286
Fair Catch                  1640
Downed                       829
Out of Bounds                586
Touchback                    383
Muffed                       154
Non-Special Teams Result      74
Blocked Punt                  39
Name: specialTeamsResult, dtype: int64

In [8]:
# get event counts for punt plays
pd.merge(
    track_df.groupby(['gameId','playId','frameId']).head(1),
    play_df[play_df['specialTeamsPlayType']=='Punt'],
    how='inner',
    on=['gameId','playId']
)['event'].value_counts()

None                        225312
ball_snap                     2136
punt                          2127
punt_received                  855
tackle                         707
first_contact                  621
fair_catch                     586
punt_land                      582
out_of_bounds                  378
punt_downed                    331
line_set                       153
touchback                      144
fumble_offense_recovered        53
punt_muffed                     52
fumble                          36
fumble_defense_recovered        31
punt_fake                       23
punt_blocked                    20
touchdown                       15
punt_play                       13
man_in_motion                   12
run                             12
penalty_flag                    11
pass_forward                    10
kick_received                    9
pass_arrived                     9
pass_outcome_caught              7
snap_direct                      4
pass_outcome_incompl

## Punt Formations - distribution of gunner count, vise count, and gunner-deficit (# gunners - # vises)

### Gunner Count

In [9]:
# total number of punt plays with data
pff_df[pff_df['kickType'].isin(['N','R','A'])].shape[0]

5880

In [10]:
# number of zero-gunner plays (max-protect)
pff_df[pff_df['gunners'].isna() 
       & (pff_df['kickType'].isin(['N','R','A']))].shape[0]

18

In [11]:
# number of gunners in plays where there are gunners
(pff_df.loc[pff_df['gunners'].notna() 
       & (pff_df['kickType'].isin(['N','R','A'])),
          'gunners']
  .transform(lambda x: x.count(';') + 1)
).value_counts().sort_index()

1      13
2    5809
3      33
4       7
Name: gunners, dtype: int64

The results above clearly show that 2 gunners is the vast majority of all scenarios, so any consideration of gunner count as a predictive feature should be investigated thoroughly.

### Vise Count

In [12]:
# number of zero-vise plays (block attempt)
pff_df[pff_df['vises'].isna() 
       & (pff_df['kickType'].isin(['N','R','A']))].shape[0]

40

In [13]:
# number of vises in plays where there are vises
(pff_df.loc[pff_df['vises'].notna() 
       & (pff_df['kickType'].isin(['N','R','A'])),
          'vises']
  .transform(lambda x: x.count(';') + 1)
).value_counts().sort_index()

1      61
2    3495
3    1746
4     531
5       7
Name: vises, dtype: int64

### Gunner deficit
Negative means less gunners than vises

In [14]:
def count_gunner_deficit(df):
    # counts gunner deficit (# gunners - # vises)
    # - if the type is not string, it is nan (float)
    gunners = 0 if type(df['gunners']) is not str else df['gunners'].count(';') + 1
    vises = 0 if type(df['vises']) is not str else df['vises'].count(';') + 1
    return gunners - vises

In [15]:
(pff_df[(pff_df['kickType'].isin(['N','R','A']))]
  .apply(lambda row: count_gunner_deficit(row), axis=1)
).value_counts().sort_index()

-3       4
-2     525
-1    1740
 0    3520
 1      70
 2      21
dtype: int64

## Relative position at catch
Get order of closest to furthest from returner at time of catch, then see which player makes the tackle
* Distribution of order vs tackler
* Distribution of space to closest defender (X yds) vs. player that makes tackle (1st, 2nd, ...)

In [18]:
# get the punt plays with a return
puntreturn_play_df = play_df.loc[(play_df['specialTeamsPlayType']=='Punt') & (play_df['specialTeamsResult']=='Return'), :]
len(puntreturn_play_df)

2286

In [19]:
puntreturn_play_df.iloc[0]

gameId                                                           2018090600
playId                                                                  366
playDescription           (9:20) C.Johnston punts 56 yards to ATL 36, Ce...
quarter                                                                   1
down                                                                      4
yardsToGo                                                                 4
possessionTeam                                                          PHI
specialTeamsPlayType                                                   Punt
specialTeamsResult                                                   Return
kickerId                                                            45603.0
returnerId                                                            42450
kickBlockerId                                                           NaN
yardlineSide                                                            PHI
yardlineNumb

In [22]:
gameId = 2018090600
playId = 366
(track_df[(track_df['gameId']==gameId)&(track_df['playId']==playId)&(track_df['event']!='None')]
 .groupby('frameId')
 .head(1)
 )[['frameId', 'event']]

Unnamed: 0,frameId,event
12070916,11,ball_snap
12070938,33,punt
12070983,78,punt_received
12071005,100,first_contact
12071024,119,tackle


In [30]:
def get_frame_of_event(track_df, event_name):
    # returns the frameId of the input event
    # track_df = tracking data for 1 play
    return track_df.loc[track_df.event==event_name, 'frameId'].iloc[0]

In [33]:
test_df = track_df[(track_df.gameId==gameId)&(track_df.playId==playId)]
get_frame_of_event(test_df, 'punt_received')

78

In [82]:
def get_defender_distances_at_catch(track_df, kickerId, returnerId):
    kicking_team = track_df.loc[track_df.nflId==kickerId, 'team'].iloc[0]
    if kicking_team == 'home':
        receiving_team = 'away'
    elif kicking_team == 'away':
        receiving_team = 'home'
    else:
        raise ValueError(f'Expected kicking team to be "home" or "away". Value was "{kicking_team}".')
    
    # get catch frameId
    catch_frame = get_frame_of_event(track_df, 'punt_received')
    
    # defender location at catch
    defender_df = track_df[(track_df.frameId==catch_frame)&(track_df.team==kicking_team)]
    defender_xy = defender_df[['x','y']].to_numpy()
    #print(defender_df.nflId)
    
    # returner location at catch
    returner_xy = track_df.loc[(track_df.frameId==catch_frame)
                           &(track_df.team==receiving_team)
                           &(track_df.nflId==returnerId), ['x','y']].to_numpy().reshape(1,-1)
    
    # calculate the distance to the returner
    diff_xy = defender_xy - returner_xy 
    dist = np.linalg.norm(diff_xy, axis=1)
    #print(dist)
    
    data_df = defender_df.loc[:,['gameId','playId','nflId','displayName','jerseyNumber','position','team']]
    data_df['dist_to_returner'] = dist
    
    # determine order
    data_df = data_df.sort_values('dist_to_returner', ascending=True).reset_index(drop=True)
    data_df['dist_order'] = data_df.index + 1
    
    return data_df
    
    
    

In [83]:
get_defender_distances_at_catch(test_df, 45603.0, 42450)

Unnamed: 0,gameId,playId,nflId,displayName,jerseyNumber,position,team,dist_to_returner,dist_order
0,2018090600,366,42515.0,D.J. Alexander,57.0,OLB,home,20.513159,1
1,2018090600,366,44979.0,Shelton Gibson,18.0,WR,home,23.104132,2
2,2018090600,366,44997.0,Nate Gerry,47.0,LB,home,24.871319,3
3,2018090600,366,32361.0,Corey Graham,24.0,FS,home,25.643791,4
4,2018090600,366,43497.0,Kamu Grugier-Hill,54.0,OLB,home,26.113822,5
5,2018090600,366,42942.0,Rick Lovato,45.0,LS,home,31.111941,6
6,2018090600,366,46194.0,Avonte Maddox,29.0,CB,home,33.178247,7
7,2018090600,366,40408.0,LaRoy Reynolds,50.0,MLB,home,33.189932,8
8,2018090600,366,45599.0,Corey Clement,30.0,RB,home,41.672288,9
9,2018090600,366,45603.0,Cameron Johnston,1.0,P,home,46.587831,10


## Depth of punt returner at snap
Potential to determine whether to stratify short punts or not, depending on distribution