This notebook shows how to:
1. Load tracking data from the NFL Big Data Bowl dataset  
2. Train the sequence + graph attention model (`SeqInterModel`)  
3. Monitor losses and RMSE  
4. Save the trained model for later inference

In [None]:
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt

from src.train import train_seq_model

In [None]:
# Paths
DATA_DIR = "/data/train"

# Use a subset for quick demo (one week)
f_in = os.path.join(DATA_DIR, "week01_input.csv")
f_out = os.path.join(DATA_DIR, "week01_output.csv")

df_in = pd.read_csv(f_in)
df_out = pd.read_csv(f_out)

print(f"Loaded {df_in.shape[0]:,} input rows and {df_out.shape[0]:,} output rows.")
print(df_in.head(3))

In [None]:
# Training parameters (small subset for demonstration)
model, best_rmse = train_seq_model(
    df_in, df_out,
    n_train=300,    # number of plays for training
    n_val=60,       # number of plays for validation
    L=11,
    k_neighbors=12,
    hid=256,
    attn_dim=192,
    epochs=20,      # fewer epochs for quick demo
    lr=5.9e-4,
    w_smooth=0.09,
    v_max=10.14,
    seed=42
)

print(f"âœ… Best validation RMSE: {best_rmse:.3f}")


In [None]:
os.makedirs("../models", exist_ok=True)
model_path = "../models/demo_model.pth"

torch.save(model.state_dict(), model_path)
print(f"ðŸ’¾ Model saved to {model_path}")

In [None]:
# Suppose you kept histories during training (example)
# For now, this simulates a plot for demonstration.
epochs = list(range(1, 21))
train_loss = [1.0/(e**0.5) + 0.05 for e in epochs]
val_loss = [1.0/(e**0.5) + 0.1 for e in epochs]

plt.figure(figsize=(7,5))
plt.plot(epochs, train_loss, label="Train Loss", marker="o")
plt.plot(epochs, val_loss, label="Validation Loss", marker="s")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Progress")
plt.grid(True, ls="--", alpha=0.5)
plt.legend()
plt.show()