# Punt plays for each season

Using the punt tracking data, punt data, and pff data, we want to match each punt with when it hits the ground or goes out of bounds (if it goes straight OoB).

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

play_data = pd.read_csv('./data/plays.csv')
pff_data = pd.read_csv('./data/PFFScoutingData.csv')

discard_results = ['Non-Special Teams Result']#,'Blocked Punt']
punt_plays = play_data[(play_data['specialTeamsPlayType']=='Punt')&(~play_data['specialTeamsResult'].isin(discard_results))]
pff_punt = pff_data.merge(punt_plays[['gameId','playId']],on=['gameId','playId'],how='inner')

display((punt_plays['gameId']//10**6).value_counts())
print('Total plays: ' + str(len(punt_plays)))

2018    2128
2019    2008
2020    1667
2021     114
Name: gameId, dtype: int64

Total plays: 5917


#### Load each year's tracking data and keep only the football for punt plays

In [None]:
# 2018 Football Tracking Data
tracking_game = pd.read_csv('./data/tracking2018.csv', chunksize=10**5)
football_tracking = pd.concat((x.query("displayName == 'football'") for x in tracking_game), ignore_index=True)
punt_fbtrack_18 = football_tracking.merge(punt_plays[['gameId','playId']],on=['gameId','playId'])
punt_fbtrack_18['time'] = pd.to_datetime(punt_fbtrack_18['time'])

# 2019 Football Tracking Data
tracking_game = pd.read_csv('./data/tracking2019.csv', chunksize=10**5)
football_tracking = pd.concat((x.query("displayName == 'football'") for x in tracking_game), ignore_index=True)
punt_fbtrack_19 = football_tracking.merge(punt_plays[['gameId','playId']],on=['gameId','playId'])
punt_fbtrack_19['time'] = pd.to_datetime(punt_fbtrack_19['time'])

# 2020 Football Tracking Data
tracking_game = pd.read_csv('./data/tracking2020.csv', chunksize=10**5)
football_tracking = pd.concat((x.query("displayName == 'football'") for x in tracking_game), ignore_index=True)
punt_fbtrack_20 = football_tracking.merge(punt_plays[['gameId','playId']],on=['gameId','playId'])
punt_fbtrack_20['time'] = pd.to_datetime(punt_fbtrack_20['time'])

# Combine all above tracking data
punt_fbtrack = pd.concat([punt_fbtrack_18,punt_fbtrack_19,punt_fbtrack_20]) \
                .sort_values(by=['gameId','playId','frameId']) \
                .drop(columns=['o','dir','nflId','displayName','jerseyNumber','position','team']) \
                .reset_index(drop=True)

## The goal is to find the first frame where the ball hits the ground after the punt

#### Useful functions that will be commonly used throughout the processing

In [None]:
# Create a table of events that occur during the punt play, removing a list of excluded events.
def event_table(df, exc_list=[]):
    df_trim = df[(~df['event'].isin(exc_list))&(df['event']!='None')].copy()
    df_trim['event_num'] = df_trim.groupby(['gameId','playId']).apply(lambda x: (x['event']!='').cumsum()) \
                                    .reset_index([0,1],drop=True)
    return pd.pivot_table(df_trim, values='event', index=['gameId','playId'],columns='event_num',aggfunc=lambda x: ''.join(x))


# Given a list of events that signal the landing frame of a play, pull out the landing frame.
# Further restriction of plays to a particular PFF Contact Type is accepted.
def find_landing_frame(df, accepted_events, contact_type = None):
    punts = df.copy()
    if contact_type != None:
        # Limit punt data to specific PFF Contact Type punts
        punts = select_contact_types(punts, contact_type)
    
    accept_frame = punts[punts['event'].isin(accepted_events)].drop_duplicates(subset=['gameId','playId'])
    
    temp = punts.merge(accept_frame[['gameId','playId','frameId']], on=['gameId','playId'],how='left')
    remaining_punts = temp[temp['frameId_y'].isna()][['gameId','playId']].drop_duplicates()
    
    return [accept_frame, remaining_punts]


# Pull the plays that match a particular PFF Contact Type
def select_contact_types(df, contact_type):
    return df.merge(pff_punt[pff_punt['kickContactType'].isin(contact_type)][['gameId','playId']])
    
    


drop_event_list = ['ball_snap','line_set','safety','pass_forward','fumble','tackle','first_contact',
                   'fumble_offense_recovered','penalty_flag','fumble_defense_recovered','lateral',
                   'pass_outcome_caught','penalty_accepted','play_action','touchdown','shift','man_in_motion',
                   'huddle_break_offense','huddle_start_offense']
# display(event_table(punt_fbtrack,drop_event_list).apply(pd.Series.value_counts))

# a, b = find_landing_frame(punt_fbtrack,cc_events,['CC'])
# display(a)
# display(b)


# cc_events = ['fair_catch','punt_received','punt_land','punt_downed','punt_muffed']
# oob_events = ['out_of_bounds','punt_land','kick_received','punt_muffed','punt_received','fair_catch']
# bc_events = ['punt_received','fair_catch','punt_muffed']
# dez_events = ['punt_land','touchback']

### Separate DEZ and OOB from the remaining punts.

These events have special conditions, where events that typically signal the ball landing may occur after the ball has exited the field of play.

In [None]:
# Select OOB and DEZ events and find all frames where the ball is out of bounds or past the endzones
# Label these frames with a new event (if current event is None) to label that the ball has left the field of play.
check_inbound_punts = select_contact_types(punt_fbtrack,['OOB','DEZ'])
check_inbound_punts.loc[
    ((check_inbound_punts['x']<10)|(check_inbound_punts['x']>110) # Touchback
    |(check_inbound_punts['y']<0)|(check_inbound_punts['y']>160/3)) # Out of Bound
    &(check_inbound_punts['event']=='None'), # Do not replace other events
    'event'] = 'out_bound'

outbound_events = ['out_bound','out_of_bounds','punt_land','kick_received','punt_muffed','punt_received','fair_catch','touchback']
outbound_plays,b=find_landing_frame(check_inbound_punts, outbound_events)
display(outbound_plays)
display(b)

## All remaining plays should have the ball land/caught in bounds

Using Hang Time from PFF data to find the frame where the ball lands will be the primary method we use.  There is some inconsistency with event labels in the tracking data, thus we do not rely on any particular event (such as punt_land etc).  However, we can see how well the hang time method corresponds to the nearest events in the tracking data.

Note, there is 1 play where the punt event is clearly mistimed.  We wil manually exclude this event in the first part of the analysis.

In [None]:
# Look at non-OOB/DEZ punt plays
# Add hangTime from PFF data to the frame where there is a punt or autoevent_kick event in the tracking data
# This time gives a frame where the ball is recorded to land.
# Most of the times, this frame will not have an event listed, but it will be very close to a large number of possible events
in_plays = punt_fbtrack.merge(pff_punt[~pff_punt['kickContactType'].isin(['OOB','DEZ'])][['gameId','playId','hangTime']])
ball_data_oob = in_plays[((in_plays['y']<0)|(in_plays['y']>160/3))&(in_plays['frameId']<30)].drop_duplicates(subset=['gameId','playId'])
ball_data_oob['Drop'] = 'Yes'
in_plays = in_plays.merge(ball_data_oob[['gameId','playId','Drop']],how='left')
in_plays.drop(in_plays[in_plays['Drop']=='Yes'].index,inplace=True)#.drop(columns=['Drop'],inplace=True)

punt_frames = in_plays[in_plays['event'].isin(['punt','autoevent_kickoff'])].drop_duplicates(subset=['gameId','playId'],keep='last').copy()
punt_frames['landFrame'] = punt_frames['frameId'] + np.ceil(10*punt_frames['hangTime'])

# One play has the 'punt'-event frame clearly with significant error, so we exclude this event.
punt_frames.drop(punt_frames[(punt_frames['gameId']==2018112507)&(punt_frames['playId']==560)].index,inplace=True)

# Save the frames equal to the calculated landing frame
in_plays = in_plays.merge(punt_frames[['gameId','playId','landFrame']],how='left')
htfmpunt_plays = in_plays[in_plays['frameId']==in_plays['landFrame']].copy()
htfmpunt_plays['Found'] = 'Yes'

# Filter out the plays that cannot be found this way
in_plays_remain = in_plays.merge(htfmpunt_plays[['gameId','playId','Found']],how='left')
in_plays_remain = in_plays_remain[in_plays_remain['Found'].isna()]
htfmpunt_plays = htfmpunt_plays.iloc[:,:-3]
display(htfmpunt_plays)
display(in_plays_remain)

In [None]:
in_plays['frames_from_land'] = np.abs(in_plays['frameId'] - in_plays['landFrame'])

fig,ax = plt.subplots(figsize=(12,8))
exclude_list = ['None','penalty_flag','line_set','ball_snap','man_in_motion']
for a,b in in_plays[~in_plays['event'].isin(exclude_list)].sort_values('frames_from_land').groupby(['gameId','playId']).first().groupby('event'):
    ax.plot(b['frameId'],b['landFrame'],'.',label=a)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.plot(np.linspace(0,100,10),np.linspace(0,100,10),'k--')
plt.xlim(20,120)
plt.ylim(40,100)
plt.show()

#### Look at events with large values between events and landing frame

In [None]:
ordered_events = in_plays[(in_plays['event']!='None')&(~in_plays['frames_from_land'].isna())].sort_values(['gameId','playId','frames_from_land'],ascending=True).copy()
ordered_events['order'] = ordered_events.groupby(['gameId','playId']).cumcount()
nearest_events_to_land = pd.pivot_table(ordered_events,values='event',index=['gameId','playId'],columns='order',aggfunc=lambda x: ''.join(x))
nearest_events_to_land

In [None]:
temp = ordered_events.groupby(['gameId','playId']).first()
large_time = punt_plays.merge(temp[temp['frames_from_land']>15].reset_index([0,1])[['gameId','playId','event','x','frameId','landFrame']])
large_time

## Remaining plays that land in bounds

Some plays have the ball entirely out of bounds, which we will remove for now.

In [None]:
no_ht = in_plays_remain[in_plays_remain['hangTime'].isna()].copy()
no_lt = in_plays_remain[(~in_plays_remain['hangTime'].isna())&(in_plays_remain['landFrame'].isna())].iloc[:,:-2].copy()
no_frame = in_plays_remain[(~in_plays_remain['hangTime'].isna())&(~in_plays_remain['landFrame'].isna())].copy()

### No Landing Time

These events occur when no 'punt' event frame is found.  This can be easily resolved by adding operationTime and hangTime to the ball_snap frame.

In [None]:
no_lt = no_lt.merge(pff_punt[['gameId','playId','operationTime']])
snap_frames = no_lt[no_lt['event']=='ball_snap'].copy()
snap_frames['landTime'] = snap_frames['frameId'] + np.floor(10*(no_lt['operationTime']+no_lt['hangTime']))
no_lt = no_lt.merge(snap_frames[['gameId','playId','landTime']])

### Landing Frame after last frame of the tracking data

11 plays have tracking data end before the punt lands

3 of these plays are from the same game, with the data ending several frames before the landing frame

The remaining 8 plays end 3 or fewer frames early.

In [None]:
no_frame_last = no_frame.drop_duplicates(subset=['gameId','playId'],keep='last')
no_frame_last_keep = no_frame_last[no_frame_last['landFrame'] - no_frame_last['frameId']<=3]
no_frame_last_remove = no_frame_last[no_frame_last['landFrame'] - no_frame_last['frameId']>3]

In [None]:
no_frame_keepevents = no_frame.merge(no_frame_last_keep[['gameId','playId']])
no_frame_keepevents[no_frame_keepevents['event']!='None']

### No Hang Time

42 Total events

6 are actual punts which were deflected.  We can use kick length and events during tracking to find the ball landing.  Of these events, 4 have events that can be used. The remaining two can be manually found.

gameId = 2018102111
playId = 3651
ball bounces on frame 38.

gameId = 2020100401
playId = 211
ball bounces on frame 54.


36 are blocked punts > For now, these events will be discarded.

In [None]:
no_ht_kct = no_ht.merge(pff_punt[['gameId','playId','kickContactType']]).drop(columns=['hangTime','landFrame','Found'])
no_ht_kct = no_ht_kct.merge(punt_plays[['gameId','playId','playDescription','specialTeamsResult','kickLength']])
no_ht_kct.drop_duplicates(subset=['gameId','playId'])['specialTeamsResult'].value_counts()

In [None]:
no_ht_punts,unfound = find_landing_frame(no_ht_kct[(no_ht_kct['specialTeamsResult']!='Blocked Punt')],['fair_catch','punt_received','punt_land'])
unfound['frameId'] = np.array([38,54])
no_ht_punts = pd.concat([no_ht_punts,no_ht_kct.merge(unfound)])

#### Blocked punts

In [None]:
blocked_punts = no_ht_kct[(no_ht_kct['specialTeamsResult']=='Blocked Punt')].drop(columns=['Drop','kickContactType','specialTeamsResult','kickLength'])
check_block = blocked_punts[~blocked_punts['event'].isin(['None','punt','punt_blocked','line_set','penalty_flag','man_in_motion'])].copy()
check_block['event_num'] = check_block.groupby(['gameId','playId']).cumcount()
blocked_punts = blocked_punts.merge(pd.pivot_table(check_block,values='frameId',index=['gameId','playId'],columns='event_num')[[0,1]].reset_index([0,1]))
block_frames = blocked_punts[(blocked_punts['frameId']<=blocked_punts[1])&(blocked_punts['frameId']>=blocked_punts[0])]
block_frames = block_frames.merge(block_frames.groupby(['gameId','playId'])['x'].first().reset_index([0,1]).rename({'x':'first_x'},axis=1))
#block_frames.drop_duplicates(subset=['gameId','playId'])

In [None]:
block_frames['x_dist_from_first'] = (block_frames['x']-block_frames['first_x'])*(-1)**(block_frames['playDirection']=='left')
#block_frames#[block_frames['x_dist_from_first']>0]

### Miscellaneous

In [None]:
test = punt_plays.merge(in_plays[in_plays['frameId']==in_plays['landFrame']][['gameId','playId','x','playDirection']] \
                        .drop_duplicates(subset=['gameId','playId'])) \
                        .merge(pff_punt[['gameId','playId','kickContactType']])
test['a'] = test['absoluteYardlineNumber'] + test['kickLength']*(-1)**(test['playDirection']=='left')

# Transform all play into right-directional data
test['x'] = 60 - (60-test['x'])*(-1)**(test['playDirection']=='left')
test['a'] = 60 - (60-test['a'])*(-1)**(test['playDirection']=='left')

#[test['kickContactType']=='CC']
fig,ax = plt.subplots(figsize=(12,8))
for a,b in test.groupby('kickContactType'):
    ax.plot(b['a'],b['x'],'.',label=a)
ax.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.plot(np.linspace(0,120,10),np.linspace(0,120,10),'k--')
plt.xlim(35,115)
plt.ylim(20,115)
plt.show()

## Combine all data on landing frame

In [None]:
print(len(outbound_plays),len(htfmpunt_plays),len(no_lt[no_lt['frameId']==no_lt['landTime']]),len(no_frame_last_keep),len(no_ht_punts))
print(len(ball_data_oob),len(no_frame_last_remove.drop_duplicates(subset=['gameId','playId'])),len(blocked_punts.drop_duplicates(subset=['gameId','playId'])))

In [None]:
landFrame_data = pd.concat([outbound_plays,htfmpunt_plays,no_lt[no_lt['frameId']==no_lt['landTime']],no_frame_last_keep,no_ht_punts],
          ignore_index=True)
landFrame_data = landFrame_data[['time','x','y','frameId','event','gameId','playId','playDirection']]
landFrame_data = landFrame_data.merge(pff_punt[['gameId','playId','hangTime']])

punt_frame_noht = punt_fbtrack[punt_fbtrack['event']=='punt'] \
    .merge(landFrame_data[landFrame_data['hangTime'].isna()][['gameId','playId','frameId']],on=['gameId','playId'])

punt_frame_noht['hangTime'] = (punt_frame_noht['frameId_y'] - punt_frame_noht['frameId_x'])/10

landFrame_data = landFrame_data.merge(punt_frame_noht[['gameId','playId','hangTime']],on=['gameId','playId'],how='left').fillna({'hangTime_x':0,'hangTime_y':0})
landFrame_data['hangTime'] = landFrame_data['hangTime_x'] + landFrame_data['hangTime_y']
landFrame_data.drop(columns=['hangTime_x','hangTime_y'],inplace=True)

landFrame_data_red = landFrame_data[['gameId','playId','x','y','hangTime','frameId','playDirection']] \
        .sort_values(by=['gameId','playId'],ascending=True,ignore_index=True)
landFrame_data_red

In [20]:
landFrame_data_red.to_csv('fb_land.csv', index=False)