# Residual Reach Model Experiments

Baseline notebook for iterating on the defender reach residual model (calibration, architecture search, and diagnostics).

## Setup
Configure paths once so rerunning across machines is easy.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error

from analytics.residual_model import (
    FEATURE_NAMES,
    TARGET_NAMES,
    collect_residual_samples,
)

REPO_ROOT = Path('..').resolve().parents[0]
DATA_ROOT = REPO_ROOT
print('Repo root:', REPO_ROOT)

## Sample Residual Dataset
Pull a manageable subset for quick iteration. Use `max_games`/`max_samples_per_play` knobs to keep RAM low.

In [None]:
samples = collect_residual_samples(
    root_dir=str(DATA_ROOT),
    max_games=1,
    max_samples_per_play=64,
    seed=2025,
)
print(samples.shape)
samples.head()

## Train/Test Split & Scaling

In [None]:
X = samples[FEATURE_NAMES].to_numpy(dtype=np.float32)
y = samples[TARGET_NAMES].to_numpy(dtype=np.float32)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler().fit(X_train)
X_train_s = scaler.transform(X_train)
X_val_s = scaler.transform(X_val)

## Baseline MLP Fit

In [None]:
mlp = MLPRegressor(hidden_layer_sizes=(64, 64), activation='relu', max_iter=200, random_state=42)
mlp.fit(X_train_s, y_train)
train_mae = mean_absolute_error(y_train, mlp.predict(X_train_s))
val_mae = mean_absolute_error(y_val, mlp.predict(X_val_s))
print({'train_mae': train_mae, 'val_mae': val_mae})

## Error Diagnostics
Plot distributions and inspect failure cases to guide architecture tweaks. Add more plots as experiments progress.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

resids = y_val - mlp.predict(X_val_s)
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
sns.histplot(resids[:, 0], ax=ax[0], kde=True, bins=30)
ax[0].set_title('Longitudinal Residuals')
sns.histplot(resids[:, 1], ax=ax[1], kde=True, bins=30)
ax[1].set_title('Lateral Residuals')
plt.show()