# Prediction of Punt Location using Player and Ball Positions at Time of Snap
### &emsp;When a team chooses to punt, the goal of the punting team is usually to give the receiving team poor field position. The outcome of the play depends on many factors: where the punter kicks the ball, the efficacy of the gunners getting downfield, the efficacy of the returner, etc... An important part of this is how the kicking and receiving teams are aligned prior to the play (e.g. if there are multiple jammers on each gunner, if the receiving team is going for a block, if the returner is anticipating a fair catch). The goal of this analysis is to use player and ball positioning in order to estimate where the punter will kick the ball, as this could be beneficial for special teams strategy.<br>&emsp;This notebook uses provided tracking data of players and the football from punt plays in order to build two models that predict where the punter is most likely to kick the ball on a given play. The models accept inputs of the x and y coordinates of all 22 players and the ball at the time of snap (i.e. when "event" equals "ball_snap") and output the estimated x and y coordinates of where the ensuing punt will land. Two models were developed: one using traditional linear regression and the other a neural network for regression. The models perform about the same against the test data, both having around 12 yards of error on average. The models both have similar error/score/loss values against train and test data which indicates good generalization.<br>&emsp;The notebook is organized into three parts:
## Part 1: Data Cleansing and Exploratory Visualization
#### Organization of data for model building and visualization
## Part 2: Model Implementation
#### The definition and fitting/training of both the linear regression model and neural network for regression
## Part 3: Analysis of Model Results
#### Testing of models
<br>

In [None]:
from types import SimpleNamespace

import matplotlib.animation as anm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import display, Markdown, HTML
from keras.models import Sequential
from keras.layers import Dense
from matplotlib.lines import Line2D
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

plt.rcParams['animation.html'] = 'jshtml'
plt.rcParams['animation.embed_limit'] = 100

## Part 1: Data Cleansing and Exploratory Visualization

In [None]:
# Define constants
COLUMN = SimpleNamespace(**{
    'PLAY_TYPE': 'specialTeamsPlayType',
    'SPECIAL_TEAMS_RESULT': 'specialTeamsResult',
    'POSITION': 'position',
    'PLAY_EVENT': 'event',
    'GAME_ID': 'gameId',
    'PLAY_ID': 'playId',
    'HOME_TEAM': 'homeTeamAbbr',
    'VISITOR_TEAM': 'visitorTeamAbbr',
    'TEAM': 'team',
    'POSSESSION_TEAM': 'possessionTeam',
    'TIME': 'time',
    'NFL_ID': 'nflId',
    'RETURNER_ID': 'returnerId',
    'PRIMARY_RETURNER_ID': 'primaryReturnerId',
    'X': 'x',
    'Y': 'y',
    'X_X': 'x_x',
    'Y_X': 'y_x',
    'X_Y': 'x_y',
    'Y_Y': 'y_y',
    'COLOR': 'color',
    'FRAME': 'frame',
    'SORT_POSITION': 'sortPosition',
    'PLAYER_TEAM_ABBR': 'playerTeamAbbr',
    'IS_PUNT_TEAM': 'isPuntTeam',
    'PUNT_TEAM_Y_POSITION': 'puntTeamYPosition',
    'REMAINING_SORT_LOCATION': 'remainingSortLocation',
    'SORT_VALUE': 'sortValue',
    'FEATURES': 'features',
    'LINEAR_REG_PREDICTION': 'linearRegressionPrediction',
    'NEURAL_NET_PREDICTION': 'neuralNetworkPrediction',
    'LINEAR_REG_ABS_ERR': 'linearRegressionAbsoluteError',
    'NEURAL_NET_ABS_ERR': 'neuralNetworkAbsoluteError',
})

POSITION = SimpleNamespace(**{
    'PUNTER': 'P',
})

PLAY_TYPE = SimpleNamespace(**{
    'PUNT': 'Punt',
})

PLAY_EVENT = SimpleNamespace(**{
    'BALL_SNAP': 'ball_snap',
    'PUNT': 'punt',
    'PUNT_RECEIVED': 'punt_received',
    'OUT_OF_BOUNDS': 'out_of_bounds',
    'PUNT_LAND': 'punt_land',
    'FAIR_CATCH': 'fair_catch',
    'PUNT_MUFFED': 'punt_muffed',
})

SPECIAL_TEAMS_RESULT = SimpleNamespace(**{
    'BLOCKED': 'Blocked Kick Attempt',
    'DOWNED': 'Downed',
    'RETURN': 'Return',
    'TOUCHBACK': 'Touchback',
    'FAIR_CATCH': 'Fair Catch',
    'MUFFED': 'Muffed',
    'OUT_OF_BOUNDS': 'Out of Bounds',
})

TEAM = SimpleNamespace(**{
    'HOME': 'home',
    'AWAY': 'away',
    'FOOTBALL': 'football',
})

COLOR = SimpleNamespace(**{
    'GREEN': 'green',
    'BLUE': 'blue',
    'RED': 'red',
    'PURPLE': 'purple',
    'ORANGE': 'orange',
})

In [None]:
# Load data into dataframes
games_df = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
plays_df = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
pff_scouting_df = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')
tracking_2018_df = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2018.csv')
tracking_2019_df = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2019.csv')
tracking_2020_df = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2020.csv')

In [None]:
# Drop unused columns
columns = list(COLUMN.__dict__.values())
games_df = games_df[games_df.columns.intersection(columns)]
plays_df = plays_df[plays_df.columns.intersection(columns)]
pff_scouting_df = pff_scouting_df[pff_scouting_df.columns.intersection(columns)]
tracking_2018_df = tracking_2018_df[tracking_2018_df.columns.intersection(columns)]
tracking_2019_df = tracking_2019_df[tracking_2019_df.columns.intersection(columns)]
tracking_2020_df = tracking_2020_df[tracking_2020_df.columns.intersection(columns)]

In [None]:
# Initial consolidating and tidying
game_plays_df = pd.merge(games_df, plays_df, left_on=COLUMN.GAME_ID, right_on=COLUMN.GAME_ID)
tracking_df = pd.concat([tracking_2018_df, tracking_2019_df, tracking_2020_df], ignore_index=True)
tracking_df[COLUMN.TIME] = pd.to_datetime(tracking_df[COLUMN.TIME])
tracking_df[COLUMN.NFL_ID] = tracking_df[COLUMN.NFL_ID].astype('Int64')

In [None]:
# Add a column indicating the primary/first returner for each play.
# This will be used as a feature in the models later on.
def getPrimaryReturnerId(value):
    if pd.isnull(value):
        return np.NaN
    returners = str(value).split(';')
    return int(returners[0])

game_plays_df[COLUMN.PRIMARY_RETURNER_ID] = game_plays_df[COLUMN.RETURNER_ID] \
    .apply(getPrimaryReturnerId) \
    .astype('Int64')

In [None]:
# Filter down to punts only
punt_tracking_df = pd.merge(
    pff_scouting_df,
    tracking_df,
    left_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
    right_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
)
punt_plays_df = game_plays_df[game_plays_df[COLUMN.PLAY_TYPE] == PLAY_TYPE.PUNT]
punt_tracking_df = pd.merge(
    punt_tracking_df,
    punt_plays_df,
    left_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
    right_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
)

In [None]:
# Plot player and ball positions over the course of each punt for one game

plot_punts_df = punt_tracking_df[punt_tracking_df[COLUMN.GAME_ID] == 2018090600].copy()

plot_punts_df.loc[:, COLUMN.FRAME] = plot_punts_df[[COLUMN.GAME_ID, COLUMN.TIME]] \
    .apply(tuple, axis=1) \
    .rank(ascending=True, method='dense')

conditions = [
    plot_punts_df[COLUMN.TEAM] == TEAM.HOME,
    plot_punts_df[COLUMN.TEAM] == TEAM.AWAY,
    plot_punts_df[COLUMN.TEAM] == TEAM.FOOTBALL,
]
values = [
    COLOR.GREEN,
    COLOR.BLUE,
    COLOR.RED,
]
plot_punts_df.loc[:, COLUMN.COLOR] = np.select(conditions, values)

fig, ax = plt.subplots(figsize=(7.5, 4.5))
ax.set(xlim=(-10, 110), ylim=(-10, 63))
first_frame = plot_punts_df[plot_punts_df[COLUMN.FRAME] == 1]
c = first_frame[COLUMN.COLOR]
x = first_frame[COLUMN.X]
y = first_frame[COLUMN.Y]
scatter = ax.scatter(x, y, c=c)
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Home Team', markerfacecolor=COLOR.GREEN),
    Line2D([0], [0], marker='o', color='w', label='Away Team', markerfacecolor=COLOR.BLUE),
    Line2D([0], [0], marker='o', color='w', label='Football', markerfacecolor=COLOR.RED),
]
ax.legend(handles=legend_elements, loc='lower right')
plt.title('Player and Ball Positions over Time')

def plot_punts_anim_fn(i):
    filtered = plot_punts_df[plot_punts_df[COLUMN.FRAME] == i+1]
    c = filtered[COLUMN.COLOR]
    x = filtered[COLUMN.X]
    y = filtered[COLUMN.Y]
    data = np.c_[x, y]
    scatter.set_offsets(data)
    scatter.set_color(c)

plot_punts_anim = anm.FuncAnimation(
    fig,
    plot_punts_anim_fn,
    interval=50,
    frames=int(plot_punts_df[COLUMN.FRAME].max()),
    repeat=True,
)
plt.close()
plot_punts_anim

## Part 2: Model Implementation

In [None]:
'''
Predict punt landing position based on pre-snap inputs (e.g. ball and player positions)

Inputs:
  - Positions of all players and ball at time of snap (22 players + ball, x and y positions)
Outputs:
  - Ball landing position x, y
  
A, B, C, D, E, F

A * x0 + B * x1 + C * x2 + D * x3 ... = ball landing position

                      SORT
x0 = ball x           1
x1 = ball y           2
x2 = punter x
x3 = punter y
x4 = returner x
x5 = returner y
x6 = player3 x
x7 = player3 y
etc...

y0 = ball landing x
y1 = ball landing y
'''

special_teams_results = [
    SPECIAL_TEAMS_RESULT.RETURN,
    SPECIAL_TEAMS_RESULT.TOUCHBACK,
    SPECIAL_TEAMS_RESULT.FAIR_CATCH,
    SPECIAL_TEAMS_RESULT.DOWNED,
    SPECIAL_TEAMS_RESULT.MUFFED,
    SPECIAL_TEAMS_RESULT.OUT_OF_BOUNDS,
]
ball_land_events = [
    PLAY_EVENT.PUNT_RECEIVED,
    PLAY_EVENT.PUNT_LAND,
    PLAY_EVENT.FAIR_CATCH,
    PLAY_EVENT.PUNT_MUFFED,
]
punt_predict_df = punt_tracking_df[
    punt_tracking_df[COLUMN.SPECIAL_TEAMS_RESULT].isin(special_teams_results)
]

In [None]:
# Label positions of football, punter and returner as features 1, 2 and 3
is_football = punt_predict_df[COLUMN.TEAM] == TEAM.FOOTBALL
is_punter = punt_predict_df[COLUMN.POSITION] == POSITION.PUNTER
is_returner = (~np.isnan(punt_predict_df[COLUMN.PRIMARY_RETURNER_ID])) \
    & (~np.isnan(punt_predict_df[COLUMN.NFL_ID])) \
    & (punt_predict_df[COLUMN.PRIMARY_RETURNER_ID] == punt_predict_df[COLUMN.NFL_ID])
inputs_df = punt_predict_df[punt_predict_df[COLUMN.PLAY_EVENT] == PLAY_EVENT.BALL_SNAP].copy()

conditions = [
    (inputs_df[COLUMN.TEAM] == TEAM.FOOTBALL),
    (inputs_df[COLUMN.POSITION] == POSITION.PUNTER),
    np.where(inputs_df[COLUMN.PRIMARY_RETURNER_ID].fillna(-1) == inputs_df[COLUMN.NFL_ID].fillna(-2), True, False),
]
values = [1, 2, 3]
inputs_df.loc[:, COLUMN.SORT_POSITION] = np.select(conditions, values)

In [None]:
# Use the remaining player positions sorted by team and y position for the rest of our features
inputs_df.loc[:, COLUMN.PLAYER_TEAM_ABBR] = np.where(
    inputs_df[COLUMN.TEAM] == TEAM.HOME,
    inputs_df[COLUMN.HOME_TEAM],
    inputs_df[COLUMN.VISITOR_TEAM],
)
inputs_df.loc[:, COLUMN.IS_PUNT_TEAM] = np.where(
    inputs_df[COLUMN.PLAYER_TEAM_ABBR] == inputs_df[COLUMN.POSSESSION_TEAM],
    True,
    False,
)
inputs_df.loc[:, COLUMN.PUNT_TEAM_Y_POSITION] = inputs_df[[COLUMN.IS_PUNT_TEAM, COLUMN.Y]].apply(tuple, axis=1)
inputs_df.loc[:, COLUMN.REMAINING_SORT_LOCATION] = inputs_df.groupby([COLUMN.GAME_ID, COLUMN.PLAY_ID])[COLUMN.PUNT_TEAM_Y_POSITION] \
    .rank(method='first')
inputs_df.loc[:, COLUMN.REMAINING_SORT_LOCATION] = inputs_df[COLUMN.REMAINING_SORT_LOCATION] + 3
inputs_df.loc[:, COLUMN.SORT_VALUE] = np.where(
    inputs_df[COLUMN.SORT_POSITION] > 0,
    inputs_df[COLUMN.SORT_POSITION],
    inputs_df[COLUMN.REMAINING_SORT_LOCATION],
)

In [None]:
# Assemble sorted tracking data into model input a.k.a features
inputs_df = inputs_df.sort_values(by=[COLUMN.GAME_ID, COLUMN.PLAY_ID, COLUMN.SORT_VALUE])

aggs = { COLUMN.X: lambda x: x.to_list(), COLUMN.Y: lambda y: y.to_list() }
inputs_df = inputs_df.groupby([COLUMN.GAME_ID, COLUMN.PLAY_ID]).agg(aggs).reset_index()

# Remove plays where we have missing tracking data for the ball and/or one or more players
inputs_df = inputs_df[
    (inputs_df[COLUMN.X].map(len) == 23)
    & (inputs_df[COLUMN.Y].map(len) == 23)
]

# Remove plays where returner is near line of scrimmage as this is not indicative of a typical punt play
inputs_df = inputs_df[
    np.abs(inputs_df[COLUMN.X].str[0] - inputs_df[COLUMN.X].str[2]) > 10
]

# Combine coordinates into x and y pairs
def merge_coordinates(df):
    merged = []
    for i in range(23):
        merged.append(df[COLUMN.X][i])
        merged.append(df[COLUMN.Y][i])
    return merged

inputs_df[COLUMN.FEATURES] = inputs_df.apply(merge_coordinates, axis=1)

In [None]:
# Filter to ball landing position for model output
outputs_df = punt_predict_df[
    (punt_predict_df[COLUMN.PLAY_EVENT].isin(ball_land_events))
    & (punt_predict_df[COLUMN.TEAM] == TEAM.FOOTBALL)
].sort_values(by=[COLUMN.GAME_ID, COLUMN.PLAY_ID]).copy()

In [None]:
# Merge model inputs/outputs and use 80/20 train test split
model_data_df = pd.merge(
    inputs_df,
    outputs_df,
    left_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
    right_on=[COLUMN.GAME_ID, COLUMN.PLAY_ID],
)

X = np.array(model_data_df[COLUMN.FEATURES].tolist())
y = model_data_df[[COLUMN.X_Y, COLUMN.Y_Y]].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

In [None]:
# Fit our linear regression model
reg = LinearRegression().fit(X_train, y_train)

In [None]:
# Calculate linear regression model predictions
model_data_df[COLUMN.LINEAR_REG_PREDICTION] = reg.predict(X).tolist()

In [None]:
# Plot linear regression model predictions for a handful of plays
plays = 50
fig, ax = plt.subplots(figsize=(7.5, 4.5))
ax.set(xlim=(-10, 110), ylim=(-10, 63))

# Football
football_x = model_data_df.iloc[0][COLUMN.X_X][0]
football_y = model_data_df.iloc[0][COLUMN.Y_X][0]

# Punter
punter_x = model_data_df.iloc[0][COLUMN.X_X][1]
punter_y = model_data_df.iloc[0][COLUMN.Y_X][1]

# Returner
returner_x = model_data_df.iloc[0][COLUMN.X_X][2]
returner_y = model_data_df.iloc[0][COLUMN.Y_X][2]

# Actual Punt Placement
actual_x = model_data_df.iloc[0][COLUMN.X_Y]
actual_y = model_data_df.iloc[0][COLUMN.Y_Y]

# Predicted Punt Placement
predict_x = model_data_df.iloc[0][COLUMN.LINEAR_REG_PREDICTION][0]
predict_y = model_data_df.iloc[0][COLUMN.LINEAR_REG_PREDICTION][1]

# Plot the initial data points
initial_x = [
    football_x,
    punter_x,
    returner_x,
    actual_x,
    predict_x,
]
initial_y = [
    football_y,
    punter_y,
    returner_y,
    actual_y,
    predict_y,
]
colors = [
    COLOR.RED,
    COLOR.GREEN,
    COLOR.BLUE,
    COLOR.PURPLE,
    COLOR.ORANGE,
]
scatter = ax.scatter(x=initial_x, y=initial_y, c=colors)

# Add legend/title
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Football At Snap', markerfacecolor=COLOR.RED),
    Line2D([0], [0], marker='o', color='w', label='Punter', markerfacecolor=COLOR.GREEN),
    Line2D([0], [0], marker='o', color='w', label='Returner', markerfacecolor=COLOR.BLUE),
    Line2D([0], [0], marker='o', color='w', label='Actual Punt Placement', markerfacecolor=COLOR.PURPLE),
    Line2D([0], [0], marker='o', color='w', label='Predicted Punt Placement', markerfacecolor=COLOR.ORANGE),
]
ax.legend(handles=legend_elements, loc='lower right')
plt.title('Linear Regression Punt Placement Prediction (Play by Play)')

def linear_reg_anim_fn(i):
    row = model_data_df.iloc[i+1]
    c = [
        COLOR.RED,
        COLOR.GREEN,
        COLOR.BLUE,
        COLOR.PURPLE,
        COLOR.ORANGE,
    ]
    x = np.array([
        row[COLUMN.X_X][0],
        row[COLUMN.X_X][1],
        row[COLUMN.X_X][2],
        row[COLUMN.X_Y],
        row[COLUMN.LINEAR_REG_PREDICTION][0],
    ])
    y = np.array([
        row[COLUMN.Y_X][0],
        row[COLUMN.Y_X][1],
        row[COLUMN.Y_X][2],
        row[COLUMN.Y_Y],
        row[COLUMN.LINEAR_REG_PREDICTION][1],
    ])
    scatter.set_offsets(np.c_[x, y])
    scatter.set_color(c)

linear_reg_anim = anm.FuncAnimation(
    fig,
    linear_reg_anim_fn,
    interval=1000,
    frames=plays,
    repeat=True,
)
plt.close()
linear_reg_anim

In [None]:
# Predict X
model_data_df[COLUMN.LINEAR_REG_PREDICTION].str[0].describe()

In [None]:
# Actual X
model_data_df[COLUMN.X_Y].describe()

In [None]:
# Predict Y
model_data_df[COLUMN.LINEAR_REG_PREDICTION].str[1].describe()

In [None]:
# Standard deviation of predicted y position is much smaller than actual
# Actual Y
model_data_df[COLUMN.Y_Y].describe()

In [None]:
# Define the neural network regression model
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(23 * 2 * 10, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs))
    model.compile(loss='mae', optimizer='adam')
    return model

model = get_model(X.shape[1], y.shape[1])

In [None]:
# Train the neural network regression model
model.fit(X_train, y_train, verbose=0, epochs=1000)

In [None]:
# Calculate neural network model predictions
model_data_df[COLUMN.NEURAL_NET_PREDICTION] = model.predict(X).tolist()

In [None]:
# Plot neural network regression model predictions for a handful of plays
plays = 50
fig, ax = plt.subplots(figsize=(7.5, 4.5))
ax.set(xlim=(-10, 110), ylim=(-10, 63))

# Football
football_x = model_data_df.iloc[0][COLUMN.X_X][0]
football_y = model_data_df.iloc[0][COLUMN.Y_X][0]

# Punter
punter_x = model_data_df.iloc[0][COLUMN.X_X][1]
punter_y = model_data_df.iloc[0][COLUMN.Y_X][1]

# Returner
returner_x = model_data_df.iloc[0][COLUMN.X_X][2]
returner_y = model_data_df.iloc[0][COLUMN.Y_X][2]

# Actual Punt Placement
actual_x = model_data_df.iloc[0][COLUMN.X_Y]
actual_y = model_data_df.iloc[0][COLUMN.Y_Y]

# Predicted Punt Placement
predict_x = model_data_df.iloc[0][COLUMN.NEURAL_NET_PREDICTION][0]
predict_y = model_data_df.iloc[0][COLUMN.NEURAL_NET_PREDICTION][1]

# Plot the initial data points
initial_x = [
    football_x,
    punter_x,
    returner_x,
    actual_x,
    predict_x,
]
initial_y = [
    football_y,
    punter_y,
    returner_y,
    actual_y,
    predict_y,
]
colors = [
    COLOR.RED,
    COLOR.GREEN,
    COLOR.BLUE,
    COLOR.PURPLE,
    COLOR.ORANGE,
]
scatter = ax.scatter(x=initial_x, y=initial_y, c=colors)

# Add legend/title
legend_elements = [
    Line2D([0], [0], marker='o', color='w', label='Football At Snap', markerfacecolor=COLOR.RED),
    Line2D([0], [0], marker='o', color='w', label='Punter', markerfacecolor=COLOR.GREEN),
    Line2D([0], [0], marker='o', color='w', label='Returner', markerfacecolor=COLOR.BLUE),
    Line2D([0], [0], marker='o', color='w', label='Actual Punt Placement', markerfacecolor=COLOR.PURPLE),
    Line2D([0], [0], marker='o', color='w', label='Predicted Punt Placement', markerfacecolor=COLOR.ORANGE),
]
ax.legend(handles=legend_elements, loc='lower right')
plt.title('Neural Network Punt Placement Prediction (Play by Play)')

def neural_net_anim_fn(i):
    row = model_data_df.iloc[i+1]
    c = [
        COLOR.RED,
        COLOR.GREEN,
        COLOR.BLUE,
        COLOR.PURPLE,
        COLOR.ORANGE,
    ]
    x = np.array([
        row[COLUMN.X_X][0],
        row[COLUMN.X_X][1],
        row[COLUMN.X_X][2],
        row[COLUMN.X_Y],
        row[COLUMN.NEURAL_NET_PREDICTION][0],
    ])
    y = np.array([
        row[COLUMN.Y_X][0],
        row[COLUMN.Y_X][1],
        row[COLUMN.Y_X][2],
        row[COLUMN.Y_Y],
        row[COLUMN.NEURAL_NET_PREDICTION][1],
    ])
    scatter.set_offsets(np.c_[x, y])
    scatter.set_color(c)

neural_net_anim = anm.FuncAnimation(
    fig,
    neural_net_anim_fn,
    interval=1000,
    frames=plays,
    repeat=True,
)
plt.close()
neural_net_anim

## Part 3: Analysis of Model Results

In [None]:
# Calculate mean absolute error for both models against all data.
# Use Pythagorean Theorem to calculate the "direct" yardage between estimate and actual.
model_data_df[COLUMN.LINEAR_REG_ABS_ERR] = (
    (model_data_df[COLUMN.X_Y] - model_data_df[COLUMN.LINEAR_REG_PREDICTION].str[0]) ** 2 +
    (model_data_df[COLUMN.Y_Y] - model_data_df[COLUMN.LINEAR_REG_PREDICTION].str[1]) ** 2
) ** 0.5
model_data_df[COLUMN.NEURAL_NET_ABS_ERR] = (
    (model_data_df[COLUMN.X_Y] - model_data_df[COLUMN.NEURAL_NET_PREDICTION].str[0]) ** 2 +
    (model_data_df[COLUMN.Y_Y] - model_data_df[COLUMN.NEURAL_NET_PREDICTION].str[1]) ** 2
) ** 0.5

In [None]:
# Calculate mean absolute error for both models against test data.
# Use Pythagorean Theorem to calculate the "direct" yardage between estimate and actual.
y_test_predict = reg.predict(X_test)
linearRegressionAbsoluteErrorTest = (
    (y_test[:, 0] - y_test_predict[:, 0]) ** 2 +
    (y_test[:, 1] - y_test_predict[:, 1]) ** 2
) ** 0.5
y_nn_test_predict = model.predict(X_test)
neuralNetworkAbsoluteErrorTest = (
    (y_test[:, 0] - y_nn_test_predict[:, 0]) ** 2 +
    (y_test[:, 1] - y_nn_test_predict[:, 1]) ** 2
) ** 0.5

In [None]:
# Compare absolute error w/ all data vs test data to see if linear regression model is overfitting
all_data_mean_error = model_data_df[COLUMN.LINEAR_REG_ABS_ERR].mean()
test_data_mean_error = np.mean(linearRegressionAbsoluteErrorTest, axis=0)
display(Markdown(f'**Linear Regression Model Absolute Error w/ All Data:** {all_data_mean_error}'))
display(Markdown(f'**Linear Regression Model Absolute Error w/ Test Data:** {test_data_mean_error}'))
# Score the linear regression model on train and test data to see if model is overfitting
train_data_score = reg.score(X_train, y_train)
test_data_score = reg.score(X_test, y_test)
display(Markdown(f'**Linear Regression Model Score w/ Train Data:** {train_data_score}'))
display(Markdown(f'**Linear Regression Model Score w/ Test Data:** {test_data_score}'))

In [None]:
# Compare absolute error w/ all data vs test data to see if neural net regression is overfitting
all_data_mean_error = model_data_df[COLUMN.NEURAL_NET_ABS_ERR].mean()
test_data_mean_error = np.mean(neuralNetworkAbsoluteErrorTest, axis=0)
display(Markdown(f'**Neural Network Absolute Error w/ All Data:** {all_data_mean_error}'))
display(Markdown(f'**Neural Network Absolute Error w/ Test Data:** {test_data_mean_error}'))
# Evaluate the neural net regression on train and test data to see if model is overfitting
display(Markdown('**Neural Network Evaluated on Train Data:**'))
model.evaluate(X_train, y_train)
display(Markdown('**Neural Network Evaluated on Test Data:**'))
_ = model.evaluate(X_test, y_test)