The following code use the idea from Claus Herther's blogpost on Bayesian modelling of field goal https://calogica.com/pymc3/python/2020/01/10/nfl-field-goals-bayes.html and apply to tracking data of field goals. The following code would be a brief outline of model only and more details are in the blogpost.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pymc3 as pm
import arviz as az

pd.options.display.max_columns = 999

import matplotlib.pyplot as plt
import seaborn as sns
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [None]:
play_data = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/plays.csv')

In [None]:
field_goal_data = play_data[play_data.specialTeamsPlayType == 'Field Goal'][['gameId','playId','absoluteYardlineNumber','specialTeamsResult']]

In [None]:
pff_data = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/PFFScoutingData.csv')

In [None]:
tracking_data = []

for year in range(2018,2021):
    data = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/tracking'+str(year) + '.csv')
    data = data[data.event == 'field_goal_attempt']
    tracking_data.append(data)
tracking_data = pd.concat(tracking_data)


In [None]:
del data

In [None]:
tracking_data = pd.merge(tracking_data,field_goal_data)

In [None]:
tracking_data.loc[tracking_data.playDirection == 'left','x'] = 120-tracking_data['x']
tracking_data.loc[tracking_data.playDirection == 'left','y'] = 53.33-tracking_data['y']

In [None]:
field_goal_ball_df = tracking_data[tracking_data.team == 'football']

To find the angle between the ball and two side of goalpost the formula is given by $\theta=\arccos(\frac{\vec{AB}\cdot\vec{BC}}{\vec{|AB|}\vec{|BC|}})$ where AB and BC are two line from goalpost to ball. 

NFL goalpost width is 18.5 feet (6 1/6 yards) and the width of whole field is 160 feet (53 1/3 yards), which makes the goalpost coordinate be $26\frac{1}{6} \pm 3\frac{1}{12}$ yards

In [None]:
ba = np.array(np.array([120,23.583])-field_goal_ball_df[['x','y']])
bc = np.array(np.array([120,29.75])-field_goal_ball_df[['x','y']])

field_goal_ball_df['angle'] =  np.degrees(np.arccos(np.array([np.dot(a,b) for a,b in zip(ba,bc)])/(np.linalg.norm(ba,axis=1)  * np.linalg.norm(bc,axis=1) )))

Similarly instead of using yardsline mark for field goal distance in traditional way, here field goal distance is measured from ball to center of goalpost

In [None]:
field_goal_ball_df['fg_dist'] = ((field_goal_ball_df['x'] - 120)**2 + (field_goal_ball_df['y'] - 26.33)**2)**0.5

In [None]:
field_goal_ball_df = field_goal_ball_df[field_goal_ball_df.fg_dist <= 70]

In [None]:
field_goal_ball_df['fg_make'] = np.array(field_goal_ball_df["specialTeamsResult"] == 'Kick Attempt Good').astype(int)

In [None]:
y = field_goal_ball_df['fg_make']
n = np.ones_like(field_goal_ball_df['fg_make'])

In [None]:
def plot_kick_data(df, ax=None):
    
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(12,8))
    sns.regplot(x="fg_dist", y="fg_make", data=df, label="observed", ax=ax, logistic=True);
    
    vals = ax.get_yticks()
    _ = ax.set_yticklabels(['{:,.1%}'.format(x) for x in vals])
    ax.set_xlabel("Yards Kicked")
    ax.set_ylabel("Field Goal Success %")
    _ = ax.set_title("Field Goal % by Yards Kicked")
    
    return ax

In [None]:
_ = plot_kick_data(field_goal_ball_df)

As stated in the blog post, the specification of model is as followed:

$\large \alpha\sim Normal(0,1)$

$\large \beta\sim Normal(0,1)$

$\large z = a + b \ast X_i$

$\large p_i = sigmoid(z)$

$\large y\sim Binomial(n, p_i)$

Three models are considered: field goal distance only, distance & angle and interaction between distance and angle.

In [None]:
X = np.array(field_goal_ball_df["fg_dist"])

with pm.Model() as model_logit_yards:
    
    α = pm.Normal("α", mu=0, sd=1)
    β = pm.Normal("β", mu=0, sd=1)

    z = α + β * X
    p = pm.Deterministic("p", pm.math.invlogit(z))

    y_obs = pm.Binomial("y_obs",n=n, p=p, observed=y)

In [None]:
pm.model_to_graphviz(model_logit_yards)

In [None]:
with model_logit_yards:
    trace_logit_yards = pm.sample(2000, tune=2000, chains=2, target_accept=0.95, return_inferencedata=True)

In [None]:
az.summary(trace_logit_yards,var_names=["α","β"])

In [None]:
az.plot_trace(trace_logit_yards,var_names=["α","β"])

In [None]:


def plot_probs_yard(df, probas_posterior, model_name, ax=None):
    
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(12,8))
    
    az.plot_hdi(x=np.array(field_goal_ball_df["fg_dist"]), y=probas_posterior, hdi_prob=.95, ax=ax, fill_kwargs={'alpha': .2})
    
    _ = plot_kick_data(df, ax)
    ax.set_title(f"{ax.get_title()} - {model_name}") ;
    
    return ax



In [None]:
plot_probs_yard(field_goal_ball_df, trace_logit_yards.posterior["p"], "Logistic Model: Distance")

In [None]:
X = np.array(field_goal_ball_df[["fg_dist",'angle']])


In [None]:
with pm.Model() as model_logit_yards_angle:

    α = pm.Normal("α", mu=0, sd=.1)
    β = pm.Normal("β", mu=0, sd=.1, shape=X.shape[1])

    z = α + pm.math.dot(X, β)

    p = pm.Deterministic("p", pm.math.invlogit(z))

    y_obs = pm.Binomial("y_obs", n=n, p=p, observed=y)


In [None]:
pm.model_to_graphviz(model_logit_yards_angle)

In [None]:
with model_logit_yards_angle:
    trace_logit_yards_angle = pm.sample(2000, tune=2000, chains=2, target_accept=0.95, return_inferencedata=True)

In [None]:
az.summary(trace_logit_yards_angle,var_names=["α","β"])

In [None]:
az.plot_trace(trace_logit_yards_angle,var_names=["α","β"])

In [None]:
plot_probs_yard(field_goal_ball_df, trace_logit_yards_angle.posterior["p"], "Logistic Model: Yards + Angle")

In [None]:
field_goal_ball_df["fg_dist_angle"] = field_goal_ball_df["fg_dist"] * field_goal_ball_df["angle"]
X = field_goal_ball_df[["fg_dist",'angle',"fg_dist_angle"]]


In [None]:
with pm.Model() as model_logit_yards_angle_interactions:

    α = pm.Normal("α", mu=0, sd=.1)
    β = pm.Normal("β", mu=0, sd=.1, shape=X.shape[1])

    z = α + pm.math.dot(X, β)

    p = pm.Deterministic("p", pm.math.invlogit(z))

    y_obs = pm.Binomial("y_obs", n=n, p=p, observed=y)

In [None]:
pm.model_to_graphviz(model_logit_yards_angle_interactions)

In [None]:
with model_logit_yards_angle_interactions:
    trace_logit_yards_angle_interactions = pm.sample(2000, tune=2000, chains=2, target_accept=0.95, return_inferencedata=True)


In [None]:
az.summary(trace_logit_yards_angle_interactions,var_names=["α","β"])

In [None]:
az.plot_trace(trace_logit_yards_angle_interactions,var_names=["α","β"])

In [None]:
plot_probs_yard(field_goal_ball_df, trace_logit_yards_angle_interactions.posterior["p"], "Logistic Model: Yards + Angle + Yards:Angle")

In [None]:
compare_dict = {"Logistic Model: Distance": trace_logit_yards, 
                "Logistic Model: Distance + Angle": trace_logit_yards_angle,
                "Logistic Model: Distance + Angle + Distance*Angle)": trace_logit_yards_angle_interactions

               }
df_compare = az.compare(compare_dict, ic="loo")
df_compare

In [None]:
_, ax = plt.subplots(1, 1, figsize=(10, 5))
az.plot_compare(df_compare, ax=ax);

After using real tracking data for modelling, all three models are not significantly different from others and so the simplist model with field goal distance only could've be considered for further modelling.