In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm

## Intro

A common practice in football is to "ice" the kicker. Icing occurs when the opposing team takes a timeout prior to the field goal attempt, usually toward the end of a half. The thought behind this practice is to make the kicker think about the kick more, allowing nerves to affect the outcome. Sometimes the timeout is taken well before the snap, while some coaches prefer to take the timeout right when the ball is snapped. The question is: does icing actually impact the result of the kick? Below I use data from the 2009-18 NFL seasons. After some cleanup and data preparation, I build a logistic regression in order to determine if icing has an effect on kick result. The findings indicate icing reduces the probability of making the kick when controlling for other factors such as kick length and whether the stadium is a dome or not.

## Data Transformations

In [2]:
# df = pd.read_csv('data/nfl_play_by_play_2009_2018.csv')

df = pd.read_csv('data/data.csv')

df = df[[
    'play_id',
    'game_id',
    'home_team',
    'away_team',
    'posteam',
    'posteam_type',
    # 'defteam',
    # 'side_of_field',
    # 'yardline_100', # distance to endzone, larger further away
    'game_date',
    'quarter_seconds_remaining',
    # 'game_seconds_remaining',`
    # 'game_half',
    'qtr',
    'yrdln',
    'play_type',
    'field_goal_result',
    'kick_distance', # adds 18 yards to yardline for snap and endzone
    # 'home_timeouts_remaining',
    # 'away_timeouts_remaining',
    'timeout',
    'timeout_team',
    'posteam_timeouts_remaining', # timeouts remaining is post timeout taken
    'defteam_timeouts_remaining',
    # 'total_home_score',
    # 'total_away_score',
    'posteam_score',
    'defteam_score',
    'score_differential',
    # 'fg_prob',
    'field_goal_attempt',
    'kicker_player_name',
    'kicker_player_id'
]]

df['prev_play_type'] = df.groupby('game_id')['play_type'].shift(1)

df['prev_play_id'] = df.groupby('game_id')['play_id'].shift(1)
df['prev_play_timeout'] = df.groupby('game_id')['timeout'].shift(1)
df['prev_play_timeout_team'] = df.groupby('game_id')['timeout_team'].shift(1)

  df = pd.read_csv('data/data.csv')


In [3]:
# Filter to field goals in final minute of either half
fg = df.query('field_goal_attempt == 1 & qtr.isin([2, 4, 5]) & score_differential > -10 & score_differential < 10')

fg = fg.query('quarter_seconds_remaining < 60 | qtr == 5')

# Create boolean for if defensive team called timeout
fg['is_iced'] = np.where(fg.prev_play_timeout == 1 & (fg.prev_play_timeout_team != fg.posteam), 1, 0)

fg['is_fg_made'] = np.where(fg.field_goal_result == 'made', 1, 0)

# Don't need these columns after other transformations
fg.drop(columns=['timeout', 'timeout_team', 'field_goal_attempt', 'game_id'], inplace=True)

In [4]:
# Get dates
fg['game_date'] = pd.to_datetime(fg['game_date'].str.strip(), format='%Y-%m-%d')

fg['game_month'] = fg['game_date'].dt.month

In [5]:
# Add boolean for if game is played in dome

domes = {'TB': 0, 'JAC': 0, 'DEN': 0, 'ATL': 1, 'NE': 0, 'OAK': 0, 'SD': 0, 'TEN': 0, 'PHI': 0, 'GB': 0,
       'CHI': 0, 'DAL': 1, 'NYG': 0, 'MIA': 0, 'IND': 1, 'MIN': 1, 'SEA': 0, 'PIT': 0, 'CLE': 0,
       'NYJ': 0, 'CIN': 0, 'STL': 1, 'BAL': 0, 'KC': 0, 'BUF': 0, 'CAR': 0, 'ARI': 1, 'HOU': 1,
       'WAS': 0, 'NO': 1, 'SF': 0, 'DET': 1, 'JAX': 0, 'LA': 1, 'LAC': 1}

domes_df = pd.DataFrame(list(domes.items()), columns=['home_team', 'is_dome'])

fg = fg.merge(domes_df, on='home_team', how='inner')

In [6]:
fg.describe()

Unnamed: 0,play_id,quarter_seconds_remaining,qtr,kick_distance,posteam_timeouts_remaining,defteam_timeouts_remaining,posteam_score,defteam_score,score_differential,prev_play_id,prev_play_timeout,is_iced,is_fg_made,game_month,is_dome
count,1251.0,1251.0,1251.0,1248.0,1251.0,1251.0,1251.0,1251.0,1251.0,1251.0,1245.0,1251.0,1251.0,1251.0,1251.0
mean,2937.370104,61.230216,2.896882,40.183494,0.786571,1.379696,13.805755,14.011191,-0.205436,2914.68745,0.46988,0.218225,0.760991,10.279776,0.293365
std,1165.92081,150.815144,1.170196,11.072517,0.840959,1.021624,8.419492,8.645084,4.118855,1166.177738,0.499292,0.413206,0.426649,1.904957,0.455486
min,1530.0,0.0,2.0,18.0,0.0,0.0,0.0,0.0,-9.0,1508.0,0.0,0.0,0.0,1.0,0.0
25%,1992.5,4.0,2.0,32.0,0.0,1.0,7.0,7.0,-3.0,1971.5,0.0,0.0,1.0,10.0,0.0
50%,2198.0,8.0,2.0,41.0,1.0,1.0,13.0,13.0,0.0,2180.0,0.0,0.0,1.0,10.0,0.0
75%,4174.0,30.0,4.0,48.0,1.0,2.0,20.0,20.0,3.0,4151.5,1.0,0.0,1.0,11.0,1.0
max,5706.0,849.0,5.0,68.0,3.0,3.0,49.0,48.0,9.0,5683.0,1.0,1.0,1.0,12.0,1.0


In [7]:
fg.dtypes

play_id                                int64
home_team                             object
away_team                             object
posteam                               object
posteam_type                          object
game_date                     datetime64[ns]
quarter_seconds_remaining            float64
qtr                                    int64
yrdln                                 object
play_type                             object
field_goal_result                     object
kick_distance                        float64
posteam_timeouts_remaining           float64
defteam_timeouts_remaining           float64
posteam_score                        float64
defteam_score                        float64
score_differential                   float64
kicker_player_name                    object
kicker_player_id                      object
prev_play_type                        object
prev_play_id                         float64
prev_play_timeout                    float64
prev_play_

In [8]:
fg.head()

Unnamed: 0,play_id,home_team,away_team,posteam,posteam_type,game_date,quarter_seconds_remaining,qtr,yrdln,play_type,...,kicker_player_name,kicker_player_id,prev_play_type,prev_play_id,prev_play_timeout,prev_play_timeout_team,is_iced,is_fg_made,game_month,is_dome
0,4250,PIT,TEN,PIT,home,2009-09-10,634.0,5,TEN 15,field_goal,...,J.Reed,00-0020737,no_play,4233.0,1.0,PIT,0,1,9,0
1,3889,PIT,SD,PIT,home,2009-10-04,48.0,4,SD 28,field_goal,...,J.Reed,00-0020737,no_play,3866.0,1.0,PIT,0,1,10,0
2,2051,PIT,CLE,PIT,home,2009-10-18,10.0,2,CLE 13,field_goal,...,J.Reed,00-0020737,pass,2025.0,0.0,,0,1,10,0
3,2037,PIT,CIN,PIT,home,2009-11-15,16.0,2,CIN 17,field_goal,...,J.Reed,00-0020737,pass,2015.0,0.0,,0,1,11,0
4,1829,PIT,ATL,PIT,home,2010-09-12,22.0,2,ATL 37,field_goal,...,J.Reed,00-0020737,no_play,1806.0,1.0,ATL,1,0,9,0


In [9]:
fg.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1251 entries, 0 to 1250
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   play_id                     1251 non-null   int64         
 1   home_team                   1251 non-null   object        
 2   away_team                   1251 non-null   object        
 3   posteam                     1251 non-null   object        
 4   posteam_type                1251 non-null   object        
 5   game_date                   1251 non-null   datetime64[ns]
 6   quarter_seconds_remaining   1251 non-null   float64       
 7   qtr                         1251 non-null   int64         
 8   yrdln                       1251 non-null   object        
 9   play_type                   1251 non-null   object        
 10  field_goal_result           1251 non-null   object        
 11  kick_distance               1248 non-null   float64     

## Data Exploration
Looking at the data, there are far fewer field goals attempted where the kicker was iced; however, those kicks are less successful, with a make rate of 68% vs 78% for kicks where the kicker wasn't iced. 

In [10]:
pd.crosstab(fg['is_iced'], fg['is_fg_made'], margins=True)

is_fg_made,0,1,All
is_iced,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,212,766,978
1,87,186,273
All,299,952,1251


In [11]:
pd.crosstab(fg['is_iced'], fg['is_fg_made'], normalize='index')

is_fg_made,0,1
is_iced,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.216769,0.783231
1,0.318681,0.681319


In [16]:
log_reg = sm.logit("is_fg_made ~ is_dome + kick_distance + is_iced + game_month + score_differential + posteam_type + qtr", data=fg).fit()

Optimization terminated successfully.
         Current function value: 0.453099
         Iterations 7


In [17]:
print(log_reg.summary())

                           Logit Regression Results                           
Dep. Variable:             is_fg_made   No. Observations:                 1248
Model:                          Logit   Df Residuals:                     1240
Method:                           MLE   Df Model:                            7
Date:                Sun, 16 Jul 2023   Pseudo R-squ.:                  0.1771
Time:                        21:13:43   Log-Likelihood:                -565.47
converged:                       True   LL-Null:                       -687.15
Covariance Type:            nonrobust   LLR p-value:                 7.163e-49
                           coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------
Intercept                5.9017      0.587     10.060      0.000       4.752       7.052
posteam_type[T.home]    -0.2656      0.148     -1.789      0.074      -0.557       0.025
is_dome     