# Fourth Down Project

Author: Seth Billiau, Sarah Lucioni

In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 12 14:27:38 2020

@author: laurieshaw

See http://www.advancedfootballanalytics.com/index.php/home/research/game-strategy/120-4th-down-study
"""

import pandas as pd
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt

def non_parametric_smooth_with_nan(y,smooth_type='savgol',window=21):
    if smooth_type=='savgol':
        y[~np.isnan(y)] = signal.savgol_filter(y[~np.isnan(y)],window,2)
    return y

def poly_smooth_with_nan(x,y,deg):
    nanvals = np.isnan(y)
    pfit,cov = np.polyfit(x[~nanvals],y[~nanvals],deg, cov=True)
    yp = np.poly1d(pfit)(x)
    return yp,pfit,cov


''' MAIN EXERCISE STARTS HERE '''
DATADIR = '/Users/sethbilliau/Desktop/stat143/code/data/'
nfl_pbp = pd.read_csv(DATADIR + 'NFL_PbP_2009_2018_4thDownAnalysis.csv')


nfl_pbp
    

Unnamed: 0,game_id,home_team,away_team,posteam,posteam_type,defteam,yardline_100,game_date,game_seconds_remaining,game_half,...,two_point_attempt,field_goal_attempt,kickoff_attempt,punt_attempt,return_team,return_yards,penalty_team,penalty_yards,next_score_relative_to_posteam,next_score_index
0,2009091000,PIT,TEN,PIT,home,TEN,30.0,2009-09-10,3600.0,Half1,...,0.0,0.0,1.0,0.0,PIT,39.0,,,6.0,72
1,2009091000,PIT,TEN,PIT,home,TEN,58.0,2009-09-10,3593.0,Half1,...,0.0,0.0,0.0,0.0,,0.0,,,6.0,72
2,2009091000,PIT,TEN,PIT,home,TEN,53.0,2009-09-10,3556.0,Half1,...,0.0,0.0,0.0,0.0,,0.0,,,6.0,72
3,2009091000,PIT,TEN,PIT,home,TEN,56.0,2009-09-10,3515.0,Half1,...,0.0,0.0,0.0,0.0,,0.0,,,6.0,72
4,2009091000,PIT,TEN,PIT,home,TEN,56.0,2009-09-10,3507.0,Half1,...,0.0,0.0,0.0,1.0,,0.0,,,6.0,72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449366,2018121700,CAR,NO,CAR,home,NO,66.0,2018-12-17,63.0,Half2,...,0.0,0.0,0.0,0.0,,0.0,,,0.0,-1
449367,2018121700,CAR,NO,CAR,home,NO,66.0,2018-12-17,58.0,Half2,...,0.0,0.0,0.0,0.0,,0.0,,,0.0,-1
449368,2018121700,CAR,NO,CAR,home,NO,61.0,2018-12-17,38.0,Half2,...,0.0,0.0,0.0,0.0,,0.0,,,0.0,-1
449369,2018121700,CAR,NO,NO,away,CAR,39.0,2018-12-17,35.0,Half2,...,0.0,0.0,0.0,0.0,,0.0,,,0.0,-1


In [None]:
# find field goal attempts
field_goal_attempts = nfl_pbp[nfl_pbp['field_goal_attempt']==1]
# initialize arrays
field_goal_success_rate = np.zeros(99)
field_goal_number_attempts = np.zeros(99)
field_position = np.arange(1,100) # i.e. from 1-99 yards
for i in field_position: # i.e. from 1-99 yards
    relevant_rows = field_goal_attempts.yardline_100==i
    field_goal_number_attempts[i-1] = np.sum(relevant_rows)
    if field_goal_number_attempts[i-1]>0:
        field_goal_success_rate[i-1] = np.sum( field_goal_attempts[relevant_rows].posteam_score_post > field_goal_attempts[relevant_rows].posteam_score) / np.sum(relevant_rows)
    else:
        field_goal_success_rate[i-1] = np.nan # no data at this distance
    

# non-parmetric filter
field_goal_success_rate_smooth_noparam = non_parametric_smooth_with_nan(field_goal_success_rate.copy(),window=21)

# make piecewise polynomial fit
field_goal_success_rate_smooth_poly = np.zeros(99)
cut_distance = 49 # anything greater than this is set to zero
field_goal_success_rate_smooth_poly[field_position<=cut_distance] = poly_smooth_with_nan(field_position[field_position<=cut_distance],field_goal_success_rate[field_position<=cut_distance],deg=3)[0]


fig,ax = plt.subplots()
ax.plot(field_position,field_goal_success_rate,'ro',alpha=0.4,label='data')
ax.plot(field_position,field_goal_success_rate_smooth_noparam,'r--',label='non-parametric smooth')
ax.plot(field_position,field_goal_success_rate_smooth_poly,'r:',label='piecewise poly smooth')
ax.set_xlabel('Field position (yards)')
ax.legend()
ax.set_ylabel('Proportion of field goals made')
ax.set_xlim(0,60)
fig.suptitle('Kicking success rate by field position')

    