# i. Imports and load the data 

In [1]:
import re
from pathlib import Path
import os

from matplotlib.ticker import FuncFormatter
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import brier_score_loss, roc_auc_score
from sklearn.calibration import calibration_curve
from sklearn.preprocessing import PolynomialFeatures
import arviz as az
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import arviz as az
import nfl_data_py as nfl

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

n_cores = os.cpu_count()

In [23]:
train_data_path = Path('../data/processed/field_goal_data.parquet')
fg_attempts = (
    pd.read_parquet(train_data_path)
    .assign(
        iced_kicker=lambda x: x['iced_kicker'].astype(int)
    )
    .merge(
        pd.read_csv('../data/processed/stadium_elevations.csv')
            [['stadium_id','elevation_feet']],
        on='stadium_id',
        how='left'
    )
)
fg_attempts.head()

Unnamed: 0,season,week,game_id,play_id,game_date,start_time,time_of_day,qtr,quarter_seconds_remaining,game_seconds_remaining,stadium,stadium_id,is_home,is_indoor,roof,grass_surface,lighting_condition,posteam,defteam,posteam_score,defteam_score,score_differential,kicker_player_name,kicker_player_id,kicker_headshot_url,kicker_height,kicker_weight,kicker_years_exp,kicker_draft_number,kicker_age,kicker_rich_hill,temperature,chance_of_rain,snow_severity,wind_speed,wind_gust,yardline_100,tie_or_take_lead,to_stay_within_one_score,pressure_rating,iced_kicker,field_goal_result,elevation_feet
0,2010,1,2010_01_GB_PHI,660.0,2010-09-12,"9/12/10, 16:15:46",2010-09-12T20:37:49Z,1.0,268.0,2968.0,Lincoln Financial Field,PHI00,1,0,outdoors,1,0,PHI,GB,0.0,0.0,0.0,David Akers,00-0000108,https://static.www.nfl.com/image/private/f_aut...,70.0,200.0,13.0,300.0,35.759,0.5,69.0,0.0,0,4.0,4.0,27.0,1,0,0.0,0,made,33
1,2010,1,2010_01_GB_PHI,3652.0,2010-09-12,"9/12/10, 16:15:46",2010-09-12T23:14:33Z,4.0,346.0,346.0,Lincoln Financial Field,PHI00,1,0,outdoors,1,0,PHI,GB,17.0,27.0,-10.0,David Akers,00-0000108,https://static.www.nfl.com/image/private/f_aut...,70.0,200.0,13.0,300.0,35.759,0.5,69.0,0.0,0,4.0,4.0,5.0,0,1,1.0,0,made,33
2,2010,4,2010_04_WAS_PHI,865.0,2010-10-03,"10/3/10, 16:15:32",2010-10-03T20:54:58Z,2.0,900.0,2700.0,Lincoln Financial Field,PHI00,1,0,outdoors,1,0,PHI,WAS,0.0,14.0,-14.0,David Akers,00-0000108,https://static.www.nfl.com/image/private/f_aut...,70.0,200.0,13.0,300.0,35.817,0.5,63.0,0.0,0,13.0,13.0,31.0,0,0,0.0,0,made,33
3,2010,4,2010_04_WAS_PHI,1680.0,2010-10-03,"10/3/10, 16:15:32",2010-10-03T21:35:05Z,2.0,23.0,1823.0,Lincoln Financial Field,PHI00,1,0,outdoors,1,0,PHI,WAS,3.0,17.0,-14.0,David Akers,00-0000108,https://static.www.nfl.com/image/private/f_aut...,70.0,200.0,13.0,300.0,35.817,0.5,63.0,0.0,0,13.0,13.0,6.0,0,0,0.0,0,made,33
4,2010,5,2010_05_PHI_SF,986.0,2010-10-10,"10/10/10, 20:30:42",2010-10-11T01:09:05Z,2.0,705.0,2505.0,Candlestick Park,SFO00,-1,0,outdoors,1,2,PHI,SF,7.0,7.0,0.0,David Akers,00-0000108,https://static.www.nfl.com/image/private/f_aut...,70.0,200.0,13.0,300.0,35.836,0.5,71.0,0.0,0,10.0,10.0,14.0,1,0,0.0,0,made,35


In [3]:
fg_attempts.field_goal_result.value_counts(normalize=False)

made       13801
missed      2262
blocked      335
Name: field_goal_result, dtype: int64

In [15]:
fg_made = fg_attempts['field_goal_result'].eq('made').astype(int)
for degree in range(1, 6):
    ytg_poly = fg_attempts['yardline_100'] ** degree
    r_squared = np.corrcoef(ytg_poly, fg_made)[0, 1] ** 2
    print(f"R-squared between yardline^{degree} and FG made: {r_squared:.4f}")

R-squared between yardline^1 and FG made: 0.1090
R-squared between yardline^2 and FG made: 0.1157
R-squared between yardline^3 and FG made: 0.1112
R-squared between yardline^4 and FG made: 0.1013
R-squared between yardline^5 and FG made: 0.0879


# ii. Load the trained model

In [101]:
model_path = Path('../models/trace_poly2_v1.nc')
if model_path.exists():
    trace = az.from_netcdf(model_path)
    print("Trace loaded successfully.")
else:
    print(f"Model file {model_path} does not exist. Please check the path or run the model first.")

Trace loaded successfully.


# iii. Define the dataset of "replacement" kickers

In [36]:
n_seasons_with_current_team

Unnamed: 0,kicker_player_name,kicker_player_id,posteam,season,kicker_years_exp
144,David Akers,00-0000108,DET,2013,16.0
0,David Akers,00-0000108,PHI,2010,13.0
41,David Akers,00-0000108,SF,2011,14.0
97,David Akers,00-0000108,SF,2012,15.0
169,Kris Brown,00-0001980,LAC,2010,11.0
179,John Carney,00-0002655,NO,2010,23.0
441,Phil Dawson,00-0004091,ARI,2017,19.0
481,Phil Dawson,00-0004091,ARI,2018,20.0
185,Phil Dawson,00-0004091,CLE,2010,12.0
225,Phil Dawson,00-0004091,CLE,2011,13.0


In [52]:
pd.Series([2011,2013,2015]).sort_values().diff().eq(1).cumsum().max() + 1

1

In [60]:
YEARS = range(2010, 2025)
data_root = Path('../data/raw/weekly_rosters')
data_root.mkdir(parents=True, exist_ok=True)

def load_or_cache_weekly_rosters(year):
    cache_file = data_root / f"{year}.parquet"
    if cache_file.exists():
        print(f"Loading weekly rosters for {year} from cache...")
        return pd.read_parquet(cache_file)
    else:
        print(f"Downloading weekly rosters for {year} from NFL API...")
        df = nfl.import_weekly_rosters([year])
        df.to_parquet(cache_file, index=False)
        print(f"Cached weekly rosters for {year} to {cache_file}")
        return df
def load_all_weekly_rosters(years):
    dataframes = []
    
    for year in years:
        df = load_or_cache_weekly_rosters(year)
        dataframes.append(df)
    
    print(f"Combining weekly rosters for {len(dataframes)} seasons...")
    combined_df = pd.concat(dataframes, ignore_index=True)
    print(f"Total rows: {len(combined_df):,}")
    
    return combined_df
weekly_rosters = load_all_weekly_rosters(YEARS).query('position=="K"')
weekly_rosters.head()

Loading weekly rosters for 2010 from cache...
Loading weekly rosters for 2011 from cache...
Loading weekly rosters for 2012 from cache...
Loading weekly rosters for 2013 from cache...
Loading weekly rosters for 2014 from cache...
Loading weekly rosters for 2015 from cache...
Loading weekly rosters for 2016 from cache...
Loading weekly rosters for 2017 from cache...
Loading weekly rosters for 2018 from cache...
Loading weekly rosters for 2019 from cache...
Loading weekly rosters for 2020 from cache...
Loading weekly rosters for 2021 from cache...
Loading weekly rosters for 2022 from cache...
Loading weekly rosters for 2023 from cache...
Loading weekly rosters for 2024 from cache...
Combining weekly rosters for 15 seasons...
Total rows: 610,092


Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
21,2010,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,13.0,https://static.www.nfl.com/image/private/f_aut...,,17,REG,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,36.066
22,2010,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,13.0,https://static.www.nfl.com/image/private/f_aut...,,9,REG,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,35.912
23,2010,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,13.0,https://static.www.nfl.com/image/private/f_aut...,,16,REG,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,36.052
24,2010,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,13.0,https://static.www.nfl.com/image/private/f_aut...,,13,REG,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,35.981
25,2010,PHI,K,,2,ACT,David Akers,David,Akers,1974-12-09,70.0,200.0,,00-0000108,,,,,,,,,13.0,https://static.www.nfl.com/image/private/f_aut...,,11,REG,A01,David,AKE551610,,3200414b-4555-1610-e0e6-a72c82e419e7,1997.0,1997.0,,,35.951


In [63]:
weekly_rosters.query('season==2024 and team=="GB"').sort_values(['player_name', 'week'])

Unnamed: 0,season,team,position,depth_chart_position,jersey_number,status,player_name,first_name,last_name,birth_date,height,weight,college,player_id,espn_id,sportradar_id,yahoo_id,rotowire_id,pff_id,pfr_id,fantasy_data_id,sleeper_id,years_exp,headshot_url,ngs_position,week,game_type,status_description_abbr,football_name,esb_id,gsis_it_id,smart_id,entry_year,rookie_year,draft_club,draft_number,age
610036,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,1,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.502
610034,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,2,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.527
610041,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,3,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.546
610037,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,4,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.565
610038,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,5,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.585
610040,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,6,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.604
610039,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,7,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.623
610046,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,8,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.642
610033,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,9,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.661
610043,2024,GB,K,K,16.0,DEV,Alex Hale,Alex,Hale,1998-03-07,72.0,205.0,Oklahoma State,00-0039945,4361831.0,c0f5daf0-5505-11ef-b7e3-6370a6e73976,,18429.0,,,,12438.0,0.0,,,11,REG,P03,Alex,HAL031251,58041,32004841-4c03-1251-97e3-c8a0c955a05c,2024.0,2024.0,,,26.7


In [54]:
import pandas as pd

# Filter the dataset to exclude rookies in 2023+
filtered_fg = (
    fg_attempts
    [['kicker_player_name','kicker_player_id', 'posteam', 'season', 'kicker_years_exp']]
    .query('~(season >= 2023 and kicker_years_exp <= 2)')
    .drop_duplicates(subset=['kicker_player_id', 'posteam', 'season'])
)

# Sort and group
filtered_fg = filtered_fg.sort_values(['kicker_player_id', 'posteam', 'season'])

# Longest streak of consecutive seasons per kicker/team
def longest_consecutive_streak(seasons):
    return (
        seasons
        .sort_values()
        .diff()
        .eq(1)
        .cumsum()
        .max()
    ) + 1

# Compute max successive seasons for each kicker/team
successive_seasons = (
    filtered_fg
    .groupby(['kicker_player_name', 'kicker_player_id', 'posteam'])['season']
    .apply(longest_consecutive_streak)
    .reset_index(name='max_successive_seasons')
)

# Get the max streak per kicker (across teams)
kicker_max_streak = (
    successive_seasons
    .sort_values('max_successive_seasons', ascending=False)
    .drop_duplicates(subset=['kicker_player_id'])
)

# =====================
# Add total seasons played and FG attempt counts
# =====================

# Count number of seasons with a FG attempt
n_seasons_played = (
    fg_attempts
    .groupby(['kicker_player_name', 'kicker_player_id'])['season']
    .nunique()
    .reset_index(name='n_seasons_played')
)

# Count total FG attempts
total_fg_attempts = (
    fg_attempts
    .groupby(['kicker_player_name', 'kicker_player_id'])
    .size()
    .reset_index(name='total_fg_attempts')
)

# Merge all together
kicker_summary = (
    kicker_max_streak
    .merge(n_seasons_played, on=['kicker_player_name', 'kicker_player_id'], how='left')
    .merge(total_fg_attempts, on=['kicker_player_name', 'kicker_player_id'], how='left')
)

# Calculate FG attempts per season
kicker_summary['fg_attempts_per_season'] = (
    kicker_summary['total_fg_attempts'] / kicker_summary['n_seasons_played']
)

# Sort for review
kicker_summary = kicker_summary.sort_values('total_fg_attempts', ascending=False)

kicker_summary

Unnamed: 0,kicker_player_name,kicker_player_id,posteam,max_successive_seasons,n_seasons_played,total_fg_attempts,fg_attempts_per_season
0,Justin Tucker,00-0029597,BAL,13,13,518,39.846154
12,Matt Prater,00-0023853,DET,7,15,461,30.733333
9,Greg Zuerlein,00-0029621,LA,8,13,429,33.0
1,Mason Crosby,00-0025580,GB,13,14,414,29.571429
11,Nick Folk,00-0025565,NYJ,7,14,412,29.428571
16,Graham Gano,00-0026858,CAR,7,14,410,29.285714
20,Robbie Gould,00-0023252,CHI,6,13,385,29.615385
22,Ryan Succop,00-0026968,TEN,6,13,384,29.538462
4,Stephen Gostkowski,00-0024333,NE,10,11,370,33.636364
6,Brandon McManus,00-0029822,DEN,9,11,349,31.727273


In [92]:
max(current_indices.values())

387

In [90]:
dataset.shape

(10742, 43)

In [95]:
kicker_season_map = defaultdict(list)
for i, (kicker_id, season) in enumerate(zip(dataset["kicker_player_id"], dataset["season"])):
    kicker_season_map[kicker_id].append((season, i))
dataset["kicker_season"] = [
    f"{kicker_id}_{season}" for kicker_id, season in zip(dataset["kicker_player_id"], dataset["season"])
]  

unique_kicker_seasons = dataset["kicker_season"].unique()
n_kicker_seasons = len(unique_kicker_seasons)

# Create a matrix to map from kicker-season to their position in the GRW
kicker_season_grw_idx = np.zeros(len(dataset), dtype=int)
for kicker_season_idx, kicker_season in enumerate(unique_kicker_seasons):
    kicker_season_grw_idx[dataset["kicker_season"] == kicker_season] = kicker_season_idx

In [69]:
dataset = (
    fg_attempts
    .query('field_goal_result.isin(["made", "missed"])')
    .copy()
    .query('season >= 2015') # Train on 10 seasons
)

from collections import defaultdict
kicker_season_map = defaultdict(list)
for i, (kicker_id, season) in enumerate(zip(dataset["kicker_player_id"], dataset["season"])):
    kicker_season_map[kicker_id].append((season, i))

unique_kickers = list(kicker_season_map.keys())
n_kickers = len(unique_kickers)

# Create a matrix to map from kicker-season to their position in the GRW
kicker_grw_idx = np.zeros(len(dataset), dtype=int)
for kicker_idx, kicker_id in enumerate(unique_kickers):
    for season, data_idx in kicker_season_map[kicker_id]:
        kicker_grw_idx[data_idx] = kicker_idx

In [73]:
kicker_grw_idx

array([  0,   0,   0, ..., 103, 103, 104])

In [81]:
len(trace.posterior['kicker_intercept'].mean(dim='chain').mean(axis=0).values)

105

In [82]:
kicker_intercepts.merge(
    pd.DataFrame({
        'kicker_player_id': kicker_intercepts['kicker_player_id'],
        'kicker_intercept': trace.posterior['kicker_intercept'].mean(dim='chain').mean(axis=0).values
    }), 
    on='kicker_player_id', 
    how='left'
)

Unnamed: 0,kicker_player_name,kicker_player_id,season,kicker_intercept
0,Phil Dawson,00-0004091,2015,0.083981
1,Adam Vinatieri,00-0016919,2015,0.091192
2,Shayne Graham,00-0019536,2015,0.100341
3,Sebastian Janikowski,00-0019646,2015,0.103018
4,Matt Bryant,00-0020578,2015,0.116914
5,Josh Brown,00-0021940,2015,0.114648
6,Josh Scobee,00-0022874,2015,0.10054
7,Nick Novak,00-0023096,2015,0.099735
8,Robbie Gould,00-0023252,2015,0.108928
9,Mike Nugent,00-0023482,2015,0.098798


In [119]:
kicker_intercepts = fg_attempts.query('2015 <= season')[['kicker_player_name','kicker_player_id', 'season']].drop_duplicates()
kicker_intercepts['kicker_season_idx'] = kicker_intercepts['kicker_player_id'].astype(str) + "_" + kicker_intercepts['season'].astype(str)

kicker_intercepts = kicker_intercepts.merge(
    pd.DataFrame({
        'kicker_season_idx': kicker_intercepts['kicker_season_idx'],
        'kicker_intercept': trace.posterior['kicker_season_intercept'].mean(dim='chain').mean(axis=0).values
    }), 
    on='kicker_season_idx', 
    how='left'
)
kicker_intercepts.query('season==2020').sort_values('kicker_intercept', ascending=False, ignore_index=True)

Unnamed: 0,kicker_player_name,kicker_player_id,season,kicker_season_idx,kicker_intercept
0,Jason Myers,00-0031492,2020,00-0031492_2020,0.227799
1,Graham Gano,00-0026858,2020,00-0026858_2020,0.22463
2,Mason Crosby,00-0025580,2020,00-0025580_2020,0.201492
3,Younghoe Koo,00-0033702,2020,00-0033702_2020,0.166808
4,Ryan Succop,00-0026968,2020,00-0026968_2020,0.146926
5,Cairo Santos,00-0031203,2020,00-0031203_2020,0.140099
6,Jason Sanders,00-0034794,2020,00-0034794_2020,0.124725
7,Chris Boswell,00-0031136,2020,00-0031136_2020,0.115135
8,Nick Folk,00-0025565,2020,00-0025565_2020,0.105546
9,Brandon McManus,00-0029822,2020,00-0029822_2020,0.088831


In [120]:
kicker_season_ytg_slopes = fg_attempts.query('2015 <= season')[['kicker_player_name','kicker_player_id', 'season']].drop_duplicates()
kicker_season_ytg_slopes['kicker_season_idx'] = kicker_season_ytg_slopes['kicker_player_id'].astype(str) + "_" + kicker_season_ytg_slopes['season'].astype(str)

kicker_season_ytg_slopes = kicker_season_ytg_slopes.merge(
    pd.DataFrame({
        'kicker_season_idx': kicker_season_ytg_slopes['kicker_season_idx'],
        'kicker_season_ytg_slope': trace.posterior['kicker_ytg_slope'].mean(dim='chain').mean(axis=0).values
    }), 
    on='kicker_season_idx', 
    how='left'
)
kicker_season_ytg_slopes.query('season==2020').sort_values('kicker_season_ytg_slope', ascending=False, ignore_index=True)

Unnamed: 0,kicker_player_name,kicker_player_id,season,kicker_season_idx,kicker_season_ytg_slope
0,Sergio Castillo,00-0031385,2020,00-0031385_2020,0.835618
1,Jon Brown,00-0032835,2020,00-0032835_2020,0.512944
2,Josh Lambo,00-0032087,2020,00-0032087_2020,0.413396
3,Jason Myers,00-0031492,2020,00-0031492_2020,0.317065
4,Steven Hauschka,00-0025944,2020,00-0025944_2020,0.073852
5,Mason Crosby,00-0025580,2020,00-0025580_2020,0.069046
6,Harrison Butker,00-0033303,2020,00-0033303_2020,0.066958
7,Matthew Wright,00-0035189,2020,00-0035189_2020,0.028265
8,Cody Parkey,00-0030850,2020,00-0030850_2020,0.014469
9,Tristan Vizcaino,00-0034909,2020,00-0034909_2020,-0.066689


In [18]:
# import numpy as np
# import pandas as pd
# import pymc as pm
# import xarray as xr
# from tqdm import tqdm

# def calculate_replacement_level_predictions(fg_attempts, trace, model, n_samples=1000):
#     """
#     Calculate replacement level predictions for all FG attempts in the dataset.
    
#     Args:
#         fg_attempts: DataFrame containing all FG attempts
#         trace: PyMC trace object from the fitted model
#         model: PyMC model object
#         n_samples: Number of posterior samples to use
        
#     Returns:
#         DataFrame with replacement level predictions and summary statistics
#     """
#     # Calculate replacement level features
#     replacement_age = fg_attempts.drop_duplicates(subset=['kicker_player_id', 'season']).kicker_age.median()
    
#     # Get kicker season indices for mapping
#     kicker_season_ids = fg_attempts["kicker_player_id"] + "_" + fg_attempts["season"].astype(str)
#     n_kicker_seasons = len(kicker_season_ids.unique())
    
#     # Get replacement level kicker parameters (10th percentile)
#     replacement_kicker_intercept = np.percentile(kicker_intercepts['kicker_intercept'].values, 10)
#     replacement_kicker_ytg_slope = np.percentile(kicker_ytg_slopes['kicker_intercept'].values, 10)
    
#     # Randomly select n_samples from the posterior (across all chains)
#     n_chains = trace.posterior.dims['chain']
#     n_draws = trace.posterior.dims['draw']
#     total_samples = n_chains * n_draws
    
#     # If we have fewer samples than requested, use all available
#     if total_samples < n_samples:
#         n_samples = total_samples
    
#     # Randomly select sample indices
#     sample_indices = np.random.choice(total_samples, size=n_samples, replace=False)
#     chain_indices = sample_indices // n_draws
#     draw_indices = sample_indices % n_draws
    
#     # Create replacement level data for posterior predictive sampling
#     with model:
#         # Create new input data with replacement level kicker features
#         # Kicker features (replacement level)
#         kicker_age_repl = np.full(len(fg_attempts), replacement_age)
#         scaler.fit(fg_attempts.drop_duplicates(['season','kicker_player_id'])['kicker_age'].values.reshape(-1, 1))
#         kicker_age_repl_scaled = scaler.transform(kicker_age_repl.reshape(-1, 1)).flatten()
#         scaler.fit((fg_attempts.drop_duplicates(['season','kicker_player_id'])['kicker_age'].values ** 10).reshape(-1, 1))
#         kicker_age_poly_repl_scaled = scaler.transform((kicker_age_repl.reshape(-1, 1) ** 10)).flatten()

#         # Situational features (keep original)
#         yards_to_goal = scaler.fit_transform(fg_attempts[['yardline_100']].values).flatten()
#         yards_to_goal_poly = scaler.fit_transform((fg_attempts[['yardline_100']].values ** 2)).flatten()
#         pressure_rating = scaler.fit_transform(fg_attempts[['pressure_rating']].values).flatten()
#         iced_kicker = scaler.fit_transform(fg_attempts[['iced_kicker']].values).flatten()
#         lighting_time_of_day = scaler.fit_transform(fg_attempts[['lighting_condition']].values).flatten()
#         is_home_team = scaler.fit_transform(fg_attempts['is_home'].values.reshape(-1,1)).flatten()

#         # Weather features (keep original)
#         is_outdoor = fg_attempts['is_indoor'].values == 0
#         temperature = np.zeros(len(fg_attempts))
#         temperature[is_outdoor] = scaler.fit_transform(fg_attempts.loc[is_outdoor, ['temperature']])[:, 0]
#         chance_of_rain = np.zeros(len(fg_attempts))
#         chance_of_rain[is_outdoor] = scaler.fit_transform(fg_attempts.loc[is_outdoor, ['chance_of_rain']])[:, 0]
#         snow_severity = np.zeros(len(fg_attempts))
#         snow_severity[is_outdoor] = scaler.fit_transform(fg_attempts.loc[is_outdoor, ['snow_severity']])[:, 0]
#         wind_gust = np.zeros(len(fg_attempts))
#         wind_gust[is_outdoor] = scaler.fit_transform(fg_attempts.loc[is_outdoor, ['wind_gust']])[:, 0]

#         # Stadium and season (keep original)
#         stadium_ids, stadium_idx = np.unique(fg_attempts['stadium_id'], return_inverse=True)
#         season = scaler.fit_transform(fg_attempts[['season']].values).flatten()

#         # Initialize array to store samples
#         p_repl_samples = np.zeros((n_samples, len(fg_attempts)))
        
#         # Sample from posterior for each selected sample
#         for i, (chain_idx, draw_idx) in tqdm(enumerate(zip(chain_indices, draw_indices)), total=n_samples, desc="Calculating Replacement Level Predictions"):
#             # Create replacement level logit for this sample
#             logit_p_repl = (
#                 trace.posterior['alpha'].isel(chain=chain_idx, draw=draw_idx).values
#                 + replacement_kicker_intercept  # replacement level intercept
#                 + replacement_kicker_ytg_slope * yards_to_goal  # replacement level slope
#                 + trace.posterior['beta_age'].isel(chain=chain_idx, draw=draw_idx).values * kicker_age_repl_scaled
#                 + trace.posterior['beta_age2'].isel(chain=chain_idx, draw=draw_idx).values * kicker_age_poly_repl_scaled
#                 + trace.posterior['beta_ytg2'].isel(chain=chain_idx, draw=draw_idx).values * yards_to_goal_poly
#                 + trace.posterior['beta_pressure_rating'].isel(chain=chain_idx, draw=draw_idx).values * pressure_rating
#                 + trace.posterior['beta_iced_kicker'].isel(chain=chain_idx, draw=draw_idx).values * iced_kicker
#                 + trace.posterior['beta_lighting_time_of_day'].isel(chain=chain_idx, draw=draw_idx).values * lighting_time_of_day
#                 + trace.posterior['beta_is_home_team'].isel(chain=chain_idx, draw=draw_idx).values * is_home_team
#                 + trace.posterior['beta_season'].isel(chain=chain_idx, draw=draw_idx).values * season
#                 + trace.posterior['stadium_intercepts'].isel(chain=chain_idx, draw=draw_idx).values[stadium_idx]
#                 + trace.posterior['beta_is_indoor'].isel(chain=chain_idx, draw=draw_idx).values * (1 - is_outdoor)
#             )
            
#             # Apply weather effects (conditional on outdoor)
#             outdoor_mask = is_outdoor.astype(int)
#             outdoor_stadium_ids = np.unique(fg_attempts.loc[is_outdoor, 'stadium_id'])
#             stadium_to_outdoor_idx = {sid: idx for idx, sid in enumerate(outdoor_stadium_ids)}
#             outdoor_stadium_idx = np.full(len(fg_attempts), -1, dtype=int)
            
#             for j in range(len(fg_attempts)):
#                 stadium_id = fg_attempts.iloc[j]['stadium_id']
#                 if stadium_id in stadium_to_outdoor_idx:
#                     outdoor_stadium_idx[j] = stadium_to_outdoor_idx[stadium_id]
            
#             # Add weather effects for outdoor games
#             weather_effect = (
#                 trace.posterior['beta_temperature_outdoor'].isel(chain=chain_idx, draw=draw_idx).values[outdoor_stadium_idx] * temperature +
#                 trace.posterior['beta_chance_of_rain_outdoor'].isel(chain=chain_idx, draw=draw_idx).values[outdoor_stadium_idx] * chance_of_rain +
#                 trace.posterior['beta_snow_severity_outdoor'].isel(chain=chain_idx, draw=draw_idx).values[outdoor_stadium_idx] * snow_severity +
#                 trace.posterior['beta_wind_gust_outdoor'].isel(chain=chain_idx, draw=draw_idx).values[outdoor_stadium_idx] * wind_gust
#             ) * outdoor_mask
            
#             logit_p_repl += weather_effect
            
#             # Calculate probabilities for this sample
#             p_repl_samples[i] = 1 / (1 + np.exp(-logit_p_repl))
        
#         # Calculate summary statistics across samples
#         repl_preds = fg_attempts.copy()
#         repl_preds['repl_fg_prob_mean'] = np.mean(p_repl_samples, axis=0)
#         repl_preds['repl_fg_prob_median'] = np.median(p_repl_samples, axis=0)
#         repl_preds['repl_fg_prob_std'] = np.std(p_repl_samples, axis=0)
#         repl_preds['repl_fg_prob_lower'] = np.percentile(p_repl_samples, 5, axis=0)
#         repl_preds['repl_fg_prob_upper'] = np.percentile(p_repl_samples, 95, axis=0)
        
#         return repl_preds

# # Calculate xFGPOR for all kicker seasons
# fg_attempts_not_blocked = (
#     fg_attempts.query('field_goal_result != "blocked"')
#     .copy()
#     .assign(
#         fg_made=lambda x: x['field_goal_result'].eq('made').astype(int)
#     )
# )
# repl_preds = calculate_replacement_level_predictions(fg_attempts_not_blocked, trace, model, n_samples=1000)

In [19]:
# def calculate_xfgpor(fg_attempts, trace, model, n_samples=1000):
#     """
#     Calculate expected FG percentage over replacement (xFGPOR) for each kicker season.
    
#     Args:
#         fg_attempts: DataFrame containing all FG attempts
#         trace: PyMC trace object from the fitted model
#         model: PyMC model object
#         n_samples: Number of posterior samples to use
        
#     Returns:
#         DataFrame with xFGPOR for each kicker season
#     """
    
#     # Calculate difference between actual and replacement level
#     fg_attempts['fg_prob_diff'] = (fg_attempts['fg_made'] - fg_attempts['repl_fg_prob_mean'])
#     fg_attempts['xFGPOR'] = 3 * fg_attempts['fg_prob_diff']
    
#     # Aggregate by kicker season
#     kicker_season_stats = (
#         fg_attempts
#         .groupby(['kicker_player_id', 'kicker_player_name', 'season'])
#         .agg(
#             attempts=('field_goal_result', 'count'),
#             fg_made=('fg_made', 'sum'),
#             fg_pct=('fg_made', 'mean'),
#             avg_repl_prob=('repl_fg_prob_mean', 'mean'),
#             xFGPOR=('xFGPOR', 'sum'),
#         )
#         .reset_index()
#     )
    
#     # Sort by xFGPOR
#     kicker_season_stats = kicker_season_stats.sort_values('xFGPOR', ascending=False)

#     return kicker_season_stats

# fg_attempts_replacement = (
#     pd.concat([fg_attempts_not_blocked, repl_preds.iloc[:,-5:]], axis=1)
# )
# kicker_season_xfgpor = calculate_xfgpor(fg_attempts_replacement, trace, model, n_samples=1000)