# Neural Network Baseline with Modular Pipeline

This notebook demonstrates the modular neural network training approach.

## Features
- Imports reusable pipeline modules
- Data preparation stays in notebook (feature engineering experimentation)
- Model training via imported functions
- Model saving with automatic versioning
- Submission server integration


In [24]:
"""
Import pipeline modules for reusable functionality.
Data preparation and feature engineering remain in the notebook for experimentation.
"""

import torch
import numpy as np
import pandas as pd
import polars as pl
from pathlib import Path
from tqdm.auto import tqdm

from pipeline.config import Config, set_seed
from pipeline.models import SeqModel, TemporalHuber, prepare_targets
from pipeline.training import train_model
from pipeline.save_model import save_model_ensemble, load_model_ensemble
from pipeline.submission_server import create_submission_server
from pipeline.utils import timer, print_timing_summary
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold

# Set random seed for reproducibility
set_seed(42)


In [2]:
# Initialize configuration
config = Config()

print("Configuration:")
print(f"  Data directory: {config.DATA_DIR}")
print(f"  Window size: {config.WINDOW_SIZE}")
print(f"  Max future horizon: {config.MAX_FUTURE_HORIZON}")
print(f"  Number of folds: {config.N_FOLDS}")
print(f"  Device: {config.DEVICE}")


Configuration:
  Data directory: /Users/matth/projects/nfl-big-data-bowl-2026-prediction/data/raw/train_data
  Window size: 10
  Max future horizon: 94
  Number of folds: 5
  Device: cpu


## Data Loading


In [3]:
# Load training data
# TODO: Implement data loading and sequence preparation here
# This is where feature engineering happens - keep it in notebook for experimentation

# Example structure:
train_input_files = [config.DATA_DIR / f"input/input_2023_w{w:02d}.csv" for w in range(1, 19)]
train_output_files = [config.DATA_DIR / f"output/output_2023_w{w:02d}.csv" for w in range(1, 19)]
train_input = pd.concat([pd.read_csv(f) for f in train_input_files if f.exists()])
train_output = pd.concat([pd.read_csv(f) for f in train_output_files if f.exists()])

print("Data preparation would go here")


Data preparation would go here


## Data Preparation

In [4]:
def height_to_feet(height_str):
    try:
        ft, inches = map(int, str(height_str).split('-'))
        return ft + inches/12
    except:
        return 6.0

In [5]:
def add_advanced_features(df):
    """Original nflnn.py features"""
    print("Adding advanced features...")
    df = df.copy()
    df = df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    gcols = ['game_id', 'play_id', 'nfl_id']
    
    # Distance rate features (3)
    if 'distance_to_ball' in df.columns:
        df['distance_to_ball_change'] = df.groupby(gcols)['distance_to_ball'].diff().fillna(0)
        df['distance_to_ball_accel'] = df.groupby(gcols)['distance_to_ball_change'].diff().fillna(0)
        df['time_to_intercept'] = (df['distance_to_ball'] / 
                                    (np.abs(df['distance_to_ball_change']) + 0.1)).clip(0, 10)
    
    # Target alignment features (3)
    if 'ball_direction_x' in df.columns:
        df['velocity_alignment'] = (
            df['velocity_x'] * df['ball_direction_x'] +
            df['velocity_y'] * df['ball_direction_y']
        )
        df['velocity_perpendicular'] = (
            df['velocity_x'] * (-df['ball_direction_y']) +
            df['velocity_y'] * df['ball_direction_x']
        )
        if 'acceleration_x' in df.columns:
            df['accel_alignment'] = (
                df['acceleration_x'] * df['ball_direction_x'] +
                df['acceleration_y'] * df['ball_direction_y']
            )
    
    # Multi-window rolling (24)
    for window in [3, 5, 10]:
        for col in ['velocity_x', 'velocity_y', 's', 'a']:
            if col in df.columns:
                df[f'{col}_roll{window}'] = df.groupby(gcols)[col].transform(
                    lambda x: x.rolling(window, min_periods=1).mean()
                )
                df[f'{col}_std{window}'] = df.groupby(gcols)[col].transform(
                    lambda x: x.rolling(window, min_periods=1).std()
                ).fillna(0)
    
    # Extended lag features (8)
    for lag in [4, 5]:
        for col in ['x', 'y', 'velocity_x', 'velocity_y']:
            if col in df.columns:
                df[f'{col}_lag{lag}'] = df.groupby(gcols)[col].shift(lag).fillna(0)
    
    # Velocity change features (4)
    if 'velocity_x' in df.columns:
        df['velocity_x_change'] = df.groupby(gcols)['velocity_x'].diff().fillna(0)
        df['velocity_y_change'] = df.groupby(gcols)['velocity_y'].diff().fillna(0)
        df['speed_change'] = df.groupby(gcols)['s'].diff().fillna(0)
        df['direction_change'] = df.groupby(gcols)['dir'].diff().fillna(0)
        df['direction_change'] = df['direction_change'].apply(
            lambda x: x if abs(x) < 180 else x - 360 * np.sign(x)
        )
    
    # Field position features (4)
    df['dist_from_left'] = df['y']
    df['dist_from_right'] = 53.3 - df['y']
    df['dist_from_sideline'] = np.minimum(df['dist_from_left'], df['dist_from_right'])
    df['dist_from_endzone'] = np.minimum(df['x'], 120 - df['x'])
    
    # Role-specific features (3)
    if 'is_receiver' in df.columns and 'velocity_alignment' in df.columns:
        df['receiver_optimality'] = df['is_receiver'] * df['velocity_alignment']
        df['receiver_deviation'] = df['is_receiver'] * np.abs(df.get('velocity_perpendicular', 0))
    if 'is_coverage' in df.columns and 'closing_speed' in df.columns:
        df['defender_closing_speed'] = df['is_coverage'] * df['closing_speed']
    
    # Time features (2)
    df['frames_elapsed'] = df.groupby(gcols).cumcount()
    df['normalized_time'] = df.groupby(gcols)['frames_elapsed'].transform(
        lambda x: x / (x.max() + 1)
    )
    
    print(f"Total features after enhancement: {len(df.columns)}")
    
    return df

In [6]:
def add_safe_catboost_features(df):
    """🎯 ONLY SAFE, GENERALIZABLE ADDITIONS"""
    print("Adding safe CatBoost features (physics-based only)...")
    
    # Player BMI
    height_parts = df['player_height'].str.split('-', expand=True)
    df['height_inches'] = height_parts[0].astype(float) * 12 + height_parts[1].astype(float)
    df['bmi'] = (df['player_weight'] / (df['height_inches']**2)) * 703
    
    # Orientation alignment
    df['orientation_diff'] = np.abs(df['o'] - df['dir'])
    df['orientation_diff'] = np.minimum(df['orientation_diff'], 360 - df['orientation_diff'])
    
    # Non-linear physics
    df['speed_squared'] = df['s'] ** 2
    df['dist_squared'] = df['distance_to_ball'] ** 2
    
    # Angle to target
    df['angle_diff'] = np.abs(df['o'] - np.degrees(df['angle_to_ball']))
    df['angle_diff'] = np.minimum(df['angle_diff'], 360 - df['angle_diff'])
    
    # Better closing speed calculation
    df['velocity_toward_ball'] = (
        df['velocity_x'] * np.cos(df['angle_to_ball']) + 
        df['velocity_y'] * np.sin(df['angle_to_ball'])
    )
    
    print("Added 6 safe physics features")
    return df

In [7]:
def get_opponent_proximity_simple(input_df):
    """Only basic opponent proximity - no complex features"""
    features = []
    
    for (gid, pid), group in tqdm(input_df.groupby(['game_id', 'play_id']), 
                                   desc="🏈 Opponent proximity", leave=False):
        last = group.sort_values('frame_id').groupby('nfl_id').last()
        
        if len(last) < 2:
            continue
            
        positions = last[['x', 'y']].values
        sides = last['player_side'].values
        speeds = last['s'].values
        directions = last['dir'].values
        
        for i, (nid, side) in enumerate(zip(last.index, sides)):
            opp_mask = sides != side
            
            feat = {
                'game_id': gid, 'play_id': pid, 'nfl_id': nid,
                'nearest_opp_dist': 50.0,
                'num_nearby_opp_3': 0,
                'num_nearby_opp_5': 0,
                'closing_speed_opp': 0.0,
            }
            
            if not opp_mask.any():
                features.append(feat)
                continue
            
            opp_positions = positions[opp_mask]
            distances = np.sqrt(((positions[i] - opp_positions)**2).sum(axis=1))
            
            if len(distances) == 0:
                features.append(feat)
                continue
                
            nearest_idx = distances.argmin()
            feat['nearest_opp_dist'] = distances[nearest_idx]
            feat['num_nearby_opp_3'] = (distances < 3.0).sum()
            feat['num_nearby_opp_5'] = (distances < 5.0).sum()
            
            # Simple closing speed
            my_vx = speeds[i] * np.sin(np.deg2rad(directions[i]))
            my_vy = speeds[i] * np.cos(np.deg2rad(directions[i]))
            opp_speeds = speeds[opp_mask]
            opp_dirs = directions[opp_mask]
            opp_vx = opp_speeds[nearest_idx] * np.sin(np.deg2rad(opp_dirs[nearest_idx]))
            opp_vy = opp_speeds[nearest_idx] * np.cos(np.deg2rad(opp_dirs[nearest_idx]))
            
            rel_vx = my_vx - opp_vx
            rel_vy = my_vy - opp_vy
            to_me = positions[i] - opp_positions[nearest_idx]
            to_me_norm = to_me / (np.linalg.norm(to_me) + 0.1)
            feat['closing_speed_opp'] = -(rel_vx * to_me_norm[0] + rel_vy * to_me_norm[1])
            
            features.append(feat)
    
    return pd.DataFrame(features)

In [8]:
def add_basic_features(input_df):
    input_df = input_df.copy()
    input_df['player_height_feet'] = input_df['player_height'].apply(height_to_feet)
    
    dir_rad = np.deg2rad(input_df['dir'].fillna(0))
    delta_t = 0.1
    input_df['velocity_x'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.sin(dir_rad)
    input_df['velocity_y'] = (input_df['s'] + 0.5 * input_df['a'] * delta_t) * np.cos(dir_rad)
    input_df['acceleration_x'] = input_df['a'] * np.sin(dir_rad)
    input_df['acceleration_y'] = input_df['a'] * np.cos(dir_rad)
    
    # Roles
    input_df['is_offense'] = (input_df['player_side'] == 'Offense').astype(int)
    input_df['is_defense'] = (input_df['player_side'] == 'Defense').astype(int)
    input_df['is_receiver'] = (input_df['player_role'] == 'Targeted Receiver').astype(int)
    input_df['is_coverage'] = (input_df['player_role'] == 'Defensive Coverage').astype(int)
    input_df['is_passer'] = (input_df['player_role'] == 'Passer').astype(int)
    
    # Physics
    mass_kg = input_df['player_weight'].fillna(200.0) / 2.20462
    input_df['momentum_x'] = input_df['velocity_x'] * mass_kg
    input_df['momentum_y'] = input_df['velocity_y'] * mass_kg
    input_df['kinetic_energy'] = 0.5 * mass_kg * (input_df['s'] ** 2)
    
    # Ball features
    if 'ball_land_x' in input_df.columns:
        ball_dx = input_df['ball_land_x'] - input_df['x']
        ball_dy = input_df['ball_land_y'] - input_df['y']
        input_df['distance_to_ball'] = np.sqrt(ball_dx**2 + ball_dy**2)
        input_df['angle_to_ball'] = np.arctan2(ball_dy, ball_dx)
        input_df['ball_direction_x'] = ball_dx / (input_df['distance_to_ball'] + 1e-6)
        input_df['ball_direction_y'] = ball_dy / (input_df['distance_to_ball'] + 1e-6)
        input_df['closing_speed'] = (
            input_df['velocity_x'] * input_df['ball_direction_x'] +
            input_df['velocity_y'] * input_df['ball_direction_y']
        )
    
    # Sort for temporal
    input_df = input_df.sort_values(['game_id', 'play_id', 'nfl_id', 'frame_id'])
    gcols = ['game_id', 'play_id', 'nfl_id']
    
    # Original lag features (1-3)
    for lag in [1, 2, 3]:
        input_df[f'x_lag{lag}'] = input_df.groupby(gcols)['x'].shift(lag)
        input_df[f'y_lag{lag}'] = input_df.groupby(gcols)['y'].shift(lag)
        input_df[f'velocity_x_lag{lag}'] = input_df.groupby(gcols)['velocity_x'].shift(lag)
        input_df[f'velocity_y_lag{lag}'] = input_df.groupby(gcols)['velocity_y'].shift(lag)
    
    # EMA features
    input_df['velocity_x_ema'] = input_df.groupby(gcols)['velocity_x'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )
    input_df['velocity_y_ema'] = input_df.groupby(gcols)['velocity_y'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )
    input_df['speed_ema'] = input_df.groupby(gcols)['s'].transform(
        lambda x: x.ewm(alpha=0.3, adjust=False).mean()
    )

    return input_df

In [18]:
def get_feature_cols(input_df):
    feature_cols = [
        # Core (9)
        'x', 'y', 's', 'a', 'o', 'dir', 'frame_id', 'ball_land_x', 'ball_land_y',
        
        # Player (4 - added height_inches, bmi)
        'player_height_feet', 'player_weight', 'height_inches', 'bmi',
        
        # Motion (7 - added speed_squared)
        'velocity_x', 'velocity_y', 'acceleration_x', 'acceleration_y',
        'momentum_x', 'momentum_y', 'kinetic_energy', 'speed_squared',
        
        # Roles (5)
        'is_offense', 'is_defense', 'is_receiver', 'is_coverage', 'is_passer',
        
        # Ball (7 - added dist_squared, velocity_toward_ball)
        'distance_to_ball', 'dist_squared', 'angle_to_ball', 
        'ball_direction_x', 'ball_direction_y', 'closing_speed', 'velocity_toward_ball',
        
        # Angles (2 - NEW)
        'orientation_diff', 'angle_diff',
        
        # Opponent (4 - NEW)
        'nearest_opp_dist', 'num_nearby_opp_3', 'num_nearby_opp_5', 'closing_speed_opp',
        
        # Original temporal (15)
        'x_lag1', 'y_lag1', 'velocity_x_lag1', 'velocity_y_lag1',
        'x_lag2', 'y_lag2', 'velocity_x_lag2', 'velocity_y_lag2',
        'x_lag3', 'y_lag3', 'velocity_x_lag3', 'velocity_y_lag3',
        'velocity_x_ema', 'velocity_y_ema', 'speed_ema',
        
        # Distance rate (3)
        'distance_to_ball_change', 'distance_to_ball_accel', 'time_to_intercept',
        
        # Target alignment (3)
        'velocity_alignment', 'velocity_perpendicular', 'accel_alignment',
        
        # Multi-window rolling (24)
        'velocity_x_roll3', 'velocity_x_std3', 'velocity_y_roll3', 'velocity_y_std3',
        's_roll3', 's_std3', 'a_roll3', 'a_std3',
        'velocity_x_roll5', 'velocity_x_std5', 'velocity_y_roll5', 'velocity_y_std5',
        's_roll5', 's_std5', 'a_roll5', 'a_std5',
        'velocity_x_roll10', 'velocity_x_std10', 'velocity_y_roll10', 'velocity_y_std10',
        's_roll10', 's_std10', 'a_roll10', 'a_std10',
        
        # Extended lags (8)
        'x_lag4', 'y_lag4', 'velocity_x_lag4', 'velocity_y_lag4',
        'x_lag5', 'y_lag5', 'velocity_x_lag5', 'velocity_y_lag5',
        
        # Velocity changes (4)
        'velocity_x_change', 'velocity_y_change', 'speed_change', 'direction_change',
        
        # Field position (2)
        'dist_from_sideline', 'dist_from_endzone',
        
        # Role-specific (3)
        'receiver_optimality', 'receiver_deviation', 'defender_closing_speed',
        
        # Time (2)
        'frames_elapsed', 'normalized_time',
    ]
    # Check for missing columns
    missing_cols = [col for col in feature_cols if col not in input_df.columns]
    existing_cols = [col for col in feature_cols if col in input_df.columns]

    if missing_cols:
        print(f"⚠️  Missing columns ({len(missing_cols)}):")
        for col in missing_cols:
            print(f"    - {col}")
        print(f"\n✓ Using {len(existing_cols)} existing columns out of {len(feature_cols)} total")
    else:
        print(f"✓ All {len(feature_cols)} feature columns found in input_df")

    feature_cols = existing_cols
    return feature_cols

In [10]:
def create_sequences(input_df, feature_cols, output_df=None, test_template=None, is_training=True, window_size=8):
    # CREATE SEQUENCES
    input_df.set_index(['game_id', 'play_id', 'nfl_id'], inplace=True)
    grouped = input_df.groupby(level=['game_id', 'play_id', 'nfl_id'])
    
    target_rows = output_df if is_training else test_template
    target_groups = target_rows[['game_id', 'play_id', 'nfl_id']].drop_duplicates()
    
    sequences, targets_dx, targets_dy, targets_frame_ids, sequence_ids = [], [], [], [], []
    
    for _, row in tqdm(target_groups.iterrows(), total=len(target_groups), desc="Creating sequences"):
        key = (row['game_id'], row['play_id'], row['nfl_id'])
        
        try:
            group_df = grouped.get_group(key)
        except KeyError:
            continue
        
        input_window = group_df.tail(window_size)
        
        if len(input_window) < window_size:
            if is_training:
                continue
            pad_len = window_size - len(input_window)
            pad_df = pd.DataFrame(np.nan, index=range(pad_len), columns=input_window.columns)
            input_window = pd.concat([pad_df, input_window], ignore_index=True)
        
        input_window = input_window.fillna(group_df.mean(numeric_only=True))
        seq = input_window[feature_cols].values
        
        if np.isnan(seq).any():
            if is_training:
                continue
            seq = np.nan_to_num(seq, nan=0.0)
        
        sequences.append(seq)
        
        if is_training:
            out_grp = output_df[
                (output_df['game_id']==row['game_id']) &
                (output_df['play_id']==row['play_id']) &
                (output_df['nfl_id']==row['nfl_id'])
            ].sort_values('frame_id')
            
            last_x = input_window.iloc[-1]['x']
            last_y = input_window.iloc[-1]['y']
            
            dx = out_grp['x'].values - last_x
            dy = out_grp['y'].values - last_y
            
            targets_dx.append(dx)
            targets_dy.append(dy)
            targets_frame_ids.append(out_grp['frame_id'].values)
        
        sequence_ids.append({
            'game_id': key[0],
            'play_id': key[1],
            'nfl_id': key[2],
            'frame_id': input_window.iloc[-1]['frame_id']
        })
    
    print(f"Created {len(sequences)} sequences with {len(feature_cols)} features each")
    
    if is_training:
        return sequences, targets_dx, targets_dy, targets_frame_ids, sequence_ids
    return sequences, sequence_ids


In [12]:
def feature_engineering(input_df, timings):
    """
    Prepare sequences with enhanced features (original + safe additions)
    """
    print(f"\n{'='*80}")
    print(f"PREPARING SEQUENCES WITH ENHANCED FEATURES")
    
    input_df = input_df.copy()
    
    # Basic features
    print("Step 1/4: Adding basic features...")
    with timer("add_basic_features", timings):
        input_df = add_basic_features(input_df)
    
    # Advanced features (original)
    print("Step 2/4: Adding advanced features...")
    with timer("add_advanced_features", timings):
        input_df = add_advanced_features(input_df)
    
    # Safe CatBoost additions
    print("Step 3/4: Adding safe CatBoost features...")
    with timer("add_safe_catboost_features", timings):
        input_df = add_safe_catboost_features(input_df)
    
    # Opponent proximity (simple)
    print("Step 3.5/4: Adding opponent proximity...")
    with timer("get_opponent_proximity_simple", timings):
        opp_features = get_opponent_proximity_simple(input_df)
        input_df = input_df.merge(opp_features, on=['game_id', 'play_id', 'nfl_id'], how='left')
    
    # Feature list
    print("Step 4/4: Creating sequences...")
    with timer("get_feature_cols", timings):
        feature_cols = get_feature_cols(input_df)

    return input_df, feature_cols, timings
    
 

In [19]:
feature_engineering_timings = {}
feature_engineering_train_input = train_input.copy()

print("\n[2/4] Preparing with enhanced features...")
with timer("feature_engineering", feature_engineering_timings):
    feature_engineering_train_input, feature_cols, feature_engineering_timings = feature_engineering(feature_engineering_train_input, feature_engineering_timings)

print_timing_summary(feature_engineering_timings, title="Feature Engineering Summary")


[2/4] Preparing with enhanced features...

PREPARING SEQUENCES WITH ENHANCED FEATURES
Step 1/4: Adding basic features...
  [32.64s] add_basic_features
Step 2/4: Adding advanced features...
Adding advanced features...
Total features after enhancement: 107
  [270.18s] add_advanced_features
Step 3/4: Adding safe CatBoost features...
Adding safe CatBoost features (physics-based only)...
Added 6 safe physics features
  [3.25s] add_safe_catboost_features
Step 3.5/4: Adding opponent proximity...


🏈 Opponent proximity:   0%|          | 0/14108 [00:00<?, ?it/s]

  [47.72s] get_opponent_proximity_simple
Step 4/4: Creating sequences...
✓ All 103 feature columns found in input_df
  [0.00s] get_feature_cols
  [354.40s] feature_engineering

Feature Engineering Summary:
  feature_engineering      : 354.40s ( 50.0%)
  add_advanced_features    : 270.18s ( 38.1%)
  get_opponent_proximity_simple:  47.72s (  6.7%)
  add_basic_features       :  32.64s (  4.6%)
  add_safe_catboost_features:   3.25s (  0.5%)
  get_feature_cols         :   0.00s (  0.0%)
Total                    : 708.19s


In [21]:
sequence_timings = {}
sequence_train_input = feature_engineering_train_input.copy()
sequence_train_output = train_output.copy()

print("\n[2.5/4] Creating sequences...")
with timer("create_sequences", sequence_timings):
    sequences, targets_dx, targets_dy, targets_frame_ids, sequence_ids = create_sequences(
        sequence_train_input, feature_cols, sequence_train_output, test_template=None, is_training=True, window_size=config.WINDOW_SIZE
    )
print_timing_summary(sequence_timings, title="Sequence Creation Summary")

sequences = np.array(sequences, dtype=object)
targets_dx = np.array(targets_dx, dtype=object)
targets_dy = np.array(targets_dy, dtype=object)


[2.5/4] Creating sequences...


Creating sequences:   0%|          | 0/46045 [00:00<?, ?it/s]

Created 46021 sequences with 103 features each
  [317.93s] create_sequences

Sequence Creation Summary:
  create_sequences         : 317.93s (100.0%)
Total                    : 317.93s


## K-Fold Training

Train models with K-fold cross-validation. The training function handles single model training with early stopping.


In [29]:
# Reload pipeline modules
import importlib
import pipeline.training
importlib.reload(pipeline.training)
importlib.reload(pipeline.save_model)
from pipeline.training import train_model
from pipeline.save_model import save_model_ensemble
print("✓ Reloaded pipeline modules")

✓ Reloaded pipeline modules


In [27]:
groups = np.array([d['game_id'] for d in sequence_ids])
gkf = GroupKFold(n_splits=config.N_FOLDS)

models_x, models_y, scalers = [], [], []
train_timings = {}

for fold, (tr, va) in enumerate(gkf.split(sequences, groups=groups), 1):
    print(f"\nFold {fold}/{config.N_FOLDS}")
    
    X_tr, X_va = sequences[tr], sequences[va]
    
    # Scale features
    with timer(f"Fold {fold} - Scaling", train_timings):
        scaler = StandardScaler()
        scaler.fit(np.vstack([s for s in X_tr]))
        X_tr_sc = np.stack([scaler.transform(s) for s in X_tr])
        X_va_sc = np.stack([scaler.transform(s) for s in X_va])
    
    # Train X model
    with timer(f"Fold {fold} - Train X model", train_timings):
        mx, loss_x = train_model(
            X_tr_sc, targets_dx[tr], X_va_sc, targets_dx[va],
            X_tr[0].shape[-1], config.MAX_FUTURE_HORIZON, config
        )
    
    # Train Y model
    with timer(f"Fold {fold} - Train Y model", train_timings):
        my, loss_y = train_model(
            X_tr_sc, targets_dy[tr], X_va_sc, targets_dy[va],
            X_tr[0].shape[-1], config.MAX_FUTURE_HORIZON, config
        )
    
    # Save for ensemble
    models_x.append(mx)
    models_y.append(my)
    scalers.append(scaler)
    
    print(f"Fold {fold} - X loss: {loss_x:.5f}, Y loss: {loss_y:.5f}")
    
# Print training summary
print("\n" + "="*60)
print("Training Complete!")
print_timing_summary(train_timings, title="Training Timing Summary")
print(f"\nTrained {len(models_x)} X models and {len(models_y)} Y models")



Fold 1/5
  [2.91s] Fold 1 - Scaling
  Epoch 10: train=0.0640, val=0.0625
  Epoch 20: train=0.0496, val=0.0492
  Epoch 30: train=0.0432, val=0.0449
  Epoch 40: train=0.0390, val=0.0453
  Epoch 50: train=0.0366, val=0.0449
  Epoch 60: train=0.0358, val=0.0448
  Epoch 70: train=0.0350, val=0.0447
  Epoch 80: train=0.0349, val=0.0446
  Epoch 90: train=0.0345, val=0.0446
  Epoch 100: train=0.0344, val=0.0446
  Early stop at epoch 101
  [339.49s] Fold 1 - Train X model
  Epoch 10: train=0.0543, val=0.0466
  Epoch 20: train=0.0441, val=0.0471
  Epoch 30: train=0.0368, val=0.0437
  Epoch 40: train=0.0337, val=0.0435
  Epoch 50: train=0.0318, val=0.0427
  Epoch 60: train=0.0309, val=0.0427
  Epoch 70: train=0.0309, val=0.0426
  Early stop at epoch 77
  [244.20s] Fold 1 - Train Y model
Fold 1 - X loss: 0.04453, Y loss: 0.04255

Fold 2/5
  [2.52s] Fold 2 - Scaling
  Epoch 10: train=0.0625, val=0.0566
  Epoch 20: train=0.0536, val=0.0584
  Epoch 30: train=0.0425, val=0.0522
  Epoch 40: train=0.03

## Model Saving

Save the trained ensemble when satisfied with results. Automatic versioning handles model organization.


In [30]:
# Save trained models
# TODO: Uncomment when models are trained
from datetime import datetime
metadata = {
    'feature_names': feature_cols,  # Make sure feature_cols is in scope
    'num_features': len(feature_cols),
    'training_date': datetime.now().isoformat(),
    'num_folds': len(models_x),
    'num_sequences': len(sequences),
    # Note: validation_losses would need to be manually entered or left out
}

save_model_ensemble(
    models_x, models_y, scalers, config, metadata,
    model_id='nn_baseline'
)

print("Model saving would go here")



✓ Model saved to: /Users/matth/projects/nfl-big-data-bowl-2026-prediction/models/nn_baseline.0
Model saving would go here


## Submission Server

Create a submission server for Kaggle competition API.


In [None]:
# Define prediction function for submission API
def predict_fn(test: pl.DataFrame, test_input: pl.DataFrame) -> pl.DataFrame:
    """
    Prediction function for Kaggle competition API.
    
    This function receives test data in batches and returns predictions.
    It should:
    1. Load saved models
    2. Transform test data (feature engineering, scaling)
    3. Generate predictions
    4. Return Polars DataFrame with x, y columns
    """
    # TODO: Implement prediction logic
    # predictions = pl.DataFrame({'x': [0.0] * len(test), 'y': [0.0] * len(test)})
    return pl.DataFrame({'x': [0.0] * len(test), 'y': [0.0] * len(test)})

# Create submission server
# server = create_submission_server(predict_fn)

# For local testing:
# server = create_submission_server(predict_fn, gateway_path=('/kaggle/input/nfl-big-data-bowl-2026-prediction/',))

print("Submission server would be set up here")
