In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

## Loading Data

In [5]:
weeks = []
for i in range(1, 19):
    df = pd.read_csv(f"train/input_2023_w{i:02d}.csv")
    weeks.append(df)

prethrow = pd.concat(weeks, ignore_index=True)

In [7]:
weeks = []
for i in range(1, 19):
    df = pd.read_csv(f"train/output_2023_w{i:02d}.csv")
    weeks.append(df)

postthrow = pd.concat(weeks, ignore_index=True)

## Getting Relevant Features

In [9]:
relevant_prethrow = prethrow[['game_id','play_id','nfl_id','frame_id','x','y','s','a','o','dir','ball_land_x','ball_land_y','player_position','player_role']]

Calculating distance to ball

In [10]:
relevant_prethrow['distance_to_land'] = np.sqrt((relevant_prethrow['ball_land_x'] - relevant_prethrow['x'])**2 + (relevant_prethrow['ball_land_y'] - relevant_prethrow['y'])**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow['distance_to_land'] = np.sqrt((relevant_prethrow['ball_land_x'] - relevant_prethrow['x'])**2 + (relevant_prethrow['ball_land_y'] - relevant_prethrow['y'])**2)


Engineering Other Features

In [11]:
dx = relevant_prethrow['x'] - relevant_prethrow['ball_land_x']
dy = relevant_prethrow['y'] - relevant_prethrow['ball_land_y']

angles = np.degrees(np.arctan2(dy, dx))
angles = (angles + 90) % 360

relevant_prethrow['angle_to_land'] = angles
def angle_diff(a, b):
    diff = (a - b + 180) % 360 - 180
    return abs(diff)
relevant_prethrow['movement_angle_difference'] = relevant_prethrow.apply(
    lambda row: angle_diff(row['angle_to_land'], row['dir']), axis=1
)

relevant_prethrow['orientation_angle_difference'] = relevant_prethrow.apply(
    lambda row: angle_diff(row['angle_to_land'], row['o']), axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow['angle_to_land'] = angles
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow['movement_angle_difference'] = relevant_prethrow.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow['orientation_angle_difference'] = relevant_prethrow.apply(


Create rolling summary stats

In [69]:
prethrow = relevant_prethrow.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])

group_cols = ['game_id', 'play_id', 'nfl_id']

In [67]:
def summarize_last10(group):
    roll = group[['x', 'y', 's', 'a', 'dir']].rolling(window=10, min_periods=1)
    
    # Compute stats
    summary = pd.DataFrame({
        'x_mean': roll['x'].mean(),
        'x_std': roll['x'].std(),
        'x_first': group['x'].shift(9).fillna(group['x'].iloc[0]),   # first of last 10 frames
        'x_last': group['x'],
        'x_min': roll['x'].min(),
        'x_max': roll['x'].max(),

        'y_mean': roll['y'].mean(),
        'y_std': roll['y'].std(),
        'y_first': group['y'].shift(9).fillna(group['y'].iloc[0]),
        'y_last': group['y'],
        'y_min': roll['y'].min(),
        'y_max': roll['y'].max(),

        'speed_mean': roll['s'].mean(),
        'speed_std': roll['s'].std(),

        'accel_mean': roll['a'].mean(),
        'accel_std': roll['a'].std(),

        'dir_mean': roll['dir'].mean(),
        'dir_std': roll['dir'].std(),
        'dir_first': group['dir'].shift(9).fillna(group['dir'].iloc[0]),
        'dir_last': group['dir'],
    })

    # Add identifying columns back
    summary[group_cols + ['frame_id']] = group[group_cols + ['frame_id']].values

    # Return only the last frame of this group
    return summary.iloc[[-1]]

In [70]:
prethrow = prethrow.groupby(group_cols, group_keys=False).apply(summarize_last10).reset_index(drop=True)

Merging Back

In [71]:
lastframe = relevant_prethrow.loc[
        lambda df: df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].idxmax()
    ]

In [72]:
lastframe = pd.merge(
    lastframe,
    prethrow,
    how = "left",
    on = ['game_id','play_id','nfl_id']
)

In [73]:
lastframe = lastframe.drop(columns=['frame_id_x','frame_id_y'])

## Getting Post Throw Frames

In [74]:
firstframe = postthrow.loc[
        lambda df: df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].idxmin()
    ]

In [75]:
data = pd.merge(
    lastframe,
    postthrow.rename(columns={'x': 'target_x', 'y': 'target_y'}),
    how = "inner",
    on = ['game_id','play_id','nfl_id']
)

## Modeling

In [76]:
data.columns

Index(['game_id', 'play_id', 'nfl_id', 'x', 'y', 's', 'a', 'o', 'dir',
       'ball_land_x', 'ball_land_y', 'player_position', 'player_role',
       'distance_to_land', 'angle_to_land', 'movement_angle_difference',
       'orientation_angle_difference', 'x_mean', 'x_std', 'x_first', 'x_last',
       'x_min', 'x_max', 'y_mean', 'y_std', 'y_first', 'y_last', 'y_min',
       'y_max', 'speed_mean', 'speed_std', 'accel_mean', 'accel_std',
       'dir_mean', 'dir_std', 'dir_first', 'dir_last', 'frame_id', 'target_x',
       'target_y'],
      dtype='object')

In [77]:
data = pd.get_dummies(data, columns=['player_role'], prefix='role')

In [78]:
subset, _ = train_test_split(data, train_size=0.1, random_state=42)

In [79]:
X = subset.drop(columns=['game_id','play_id','nfl_id','target_x','target_y', 'x_last','y_last','dir_last', 'player_position'])
y = subset[['target_x','target_y']]

In [80]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [81]:
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

In [82]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [83]:
r2_train_x = r2_score(y_train['target_x'], y_train_pred[:, 0])
r2_train_y = r2_score(y_train['target_y'], y_train_pred[:, 1])

r2_test_x = r2_score(y_test['target_x'], y_test_pred[:, 0])
r2_test_y = r2_score(y_test['target_y'], y_test_pred[:, 1])

print(f"Training R²: x = {r2_train_x:.3f}, y = {r2_train_y:.3f}")
print(f"Testing  R²: x = {r2_test_x:.3f}, y = {r2_test_y:.3f}")

Training R²: x = 0.999, y = 0.998
Testing  R²: x = 0.996, y = 0.985


In [84]:
mae_train_x = mean_absolute_error(y_train['target_x'], y_train_pred[:, 0])
mae_train_y = mean_absolute_error(y_train['target_y'], y_train_pred[:, 1])
mae_test_x = mean_absolute_error(y_test['target_x'], y_test_pred[:, 0])
mae_test_y = mean_absolute_error(y_test['target_y'], y_test_pred[:, 1])

print(f"Training MAE: x = {mae_train_x:.3f}, y = {mae_train_y:.3f}")
print(f"Testing  MAE: x = {mae_test_x:.3f}, y = {mae_test_y:.3f}")

Training MAE: x = 0.371, y = 0.396
Testing  MAE: x = 1.000, y = 1.082


In [85]:
rmse_x = np.sqrt(np.mean((y_test['target_x'] - y_test_pred[:, 0])**2))
rmse_y = np.sqrt(np.mean((y_test['target_y'] - y_test_pred[:, 1])**2))
print("X: ", rmse_x, "\n Y: ", rmse_y)

X:  1.5530257628136492 
 Y:  1.6385495519573139


## Predicting Week 17

In [86]:
week17_prethrow = pd.read_csv("train/input_2023_w17.csv")
week17_postthrow = pd.read_csv("train/output_2023_w17.csv")

In [87]:
relevant_prethrow_17 = week17_prethrow[['game_id','play_id','nfl_id','frame_id','x','y','s','a','o','dir','ball_land_x','ball_land_y','player_position','player_role']]

Calculating distance to ball

In [88]:
relevant_prethrow_17['distance_to_land'] = np.sqrt((relevant_prethrow_17['ball_land_x'] - relevant_prethrow_17['x'])**2 + (relevant_prethrow_17['ball_land_y'] - relevant_prethrow_17['y'])**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow_17['distance_to_land'] = np.sqrt((relevant_prethrow_17['ball_land_x'] - relevant_prethrow_17['x'])**2 + (relevant_prethrow_17['ball_land_y'] - relevant_prethrow_17['y'])**2)


Engineering Other Features

In [89]:
dx = relevant_prethrow_17['x'] - relevant_prethrow_17['ball_land_x']
dy = relevant_prethrow_17['y'] - relevant_prethrow_17['ball_land_y']

angles = np.degrees(np.arctan2(dy, dx))
angles = (angles + 90) % 360

relevant_prethrow_17['angle_to_land'] = angles
def angle_diff(a, b):
    diff = (a - b + 180) % 360 - 180
    return abs(diff)
relevant_prethrow_17['movement_angle_difference'] = relevant_prethrow_17.apply(
    lambda row: angle_diff(row['angle_to_land'], row['dir']), axis=1
)

relevant_prethrow_17['orientation_angle_difference'] = relevant_prethrow_17.apply(
    lambda row: angle_diff(row['angle_to_land'], row['o']), axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow_17['angle_to_land'] = angles
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow_17['movement_angle_difference'] = relevant_prethrow_17.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_prethrow_17['orientation_angle_difference'] = relevant_prethrow_17.ap

Create rolling summary stats

In [90]:
prethrow_17 = relevant_prethrow_17.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])

group_cols = ['game_id', 'play_id', 'nfl_id']

In [91]:
prethrow_17 = relevant_prethrow_17.groupby(group_cols, group_keys=False).apply(summarize_last10).reset_index(drop=True)

Merging Back

In [92]:
lastframe_17 = relevant_prethrow_17.loc[
        lambda df: df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].idxmax()
    ]

In [93]:
lastframe_17 = pd.merge(
    lastframe_17,
    prethrow_17,
    how = "left",
    on = ['game_id','play_id','nfl_id']
)

In [94]:
lastframe_17 = lastframe_17.drop(columns=['frame_id_x','frame_id_y'])

## Getting Post Throw Frames

In [95]:
firstframe_17 = week17_postthrow.loc[
        lambda df: df.groupby(['game_id', 'play_id', 'nfl_id'])['frame_id'].idxmin()
    ]

In [96]:
data_17 = pd.merge(
    lastframe_17,
    week17_postthrow.rename(columns={'x': 'target_x', 'y': 'target_y'}),
    how = "inner",
    on = ['game_id','play_id','nfl_id']
)

In [97]:
data_17 = pd.get_dummies(data_17, columns=['player_role'], prefix='role')

In [98]:
X_17 = data_17.drop(columns=['game_id','play_id','nfl_id','target_x','target_y', 'x_last','y_last','dir_last', 'player_position'])
y_17 = data_17[['target_x','target_y']]

In [99]:
y_pred_17 = model.predict(X_17)

In [100]:
r2_test_x = r2_score(y_17['target_x'], y_pred_17[:, 0])
r2_test_y = r2_score(y_17['target_y'], y_pred_17[:, 1])

print(f"Testing  R²: x = {r2_test_x:.3f}, y = {r2_test_y:.3f}")

Testing  R²: x = 0.996, y = 0.987


In [101]:
mae_test_x = mean_absolute_error(y_17['target_x'], y_pred_17[:, 0])
mae_test_y = mean_absolute_error(y_17['target_y'], y_pred_17[:, 1])

print(f"Testing  MAE: x = {mae_test_x:.3f}, y = {mae_test_y:.3f}")

Testing  MAE: x = 0.957, y = 0.981


In [102]:
rmse_x = np.sqrt(np.mean((y_17['target_x'] - y_pred_17[:, 0])**2))
rmse_y = np.sqrt(np.mean((y_17['target_y'] - y_pred_17[:, 1])**2))
print("X: ", rmse_x, "\n Y: ", rmse_y)

X:  1.5718698366580106 
 Y:  1.4720230777883117
