# Model 6: Neural Network - Training and Hyperparameter Tuning

This notebook trains neural network models for hockey goal prediction using sklearn's MLPRegressor.

## Table of Contents

1. Setup and Imports
2. Load Data
3. Data Preprocessing
4. Baseline Neural Network
5. Random Search Hyperparameter Tuning
6. Grid Search (Fine-tuning)
7. Learning Curve Analysis
8. Final Model Evaluation
9. Save Best Model

## Neural Network Considerations

- **Feature Scaling**: Critical for neural networks
- **Architecture**: Number/size of hidden layers
- **Regularization**: L2 penalty (alpha), early stopping
- **Activation**: ReLU typically works well

## 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, cross_val_score, KFold, learning_curve
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import os
import sys
import json
import yaml
import pickle
import pathlib
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

# Reliably set cwd to the python/ folder
_cwd = pathlib.Path(os.path.abspath('')).resolve()
if (_cwd / 'python').is_dir():
    _python_dir = _cwd / 'python'
elif _cwd.name == 'neural_network' and (_cwd.parent.parent / 'data').is_dir():
    _python_dir = _cwd.parent.parent
elif _cwd.name == 'training' and (_cwd.parent / 'data').is_dir():
    _python_dir = _cwd.parent
elif (_cwd / 'data').is_dir():
    _python_dir = _cwd
else:
    raise RuntimeError(f'Cannot locate python/ directory from {_cwd}')

os.chdir(_python_dir)
sys.path.insert(0, str(_python_dir))

# Configure plotting
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11
sns.set_style('whitegrid')

# Set random seed for reproducibility
np.random.seed(42)

print(f"CWD: {os.getcwd()}")
print("Scikit-learn MLPRegressor ready")
print("Setup complete.")

In [None]:
# Load hyperparameter configuration
config_path = '../config/hyperparams/model6_neural_network.yaml'

if os.path.exists(config_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    print(f"Loaded config: {config.get('model_name', 'Neural Network')}")
    print(f"Description: {config.get('description', 'MLP for goal prediction')}")
else:
    print(f"Config not found at {config_path}, using defaults")
    config = None

## 2. Load or Generate Data

In [None]:
# Try to load real data, otherwise generate synthetic
data_path = 'data/hockey_features.csv'

if os.path.exists(data_path):
    data = pd.read_csv(data_path)
    print(f"Loaded {len(data)} games from {data_path}")
else:
    print("Generating synthetic hockey data for demonstration...")
    
    np.random.seed(42)
    n_games = 2000
    
    data = pd.DataFrame({
        # Team strength metrics
        'home_win_pct': np.random.uniform(0.3, 0.7, n_games),
        'away_win_pct': np.random.uniform(0.3, 0.7, n_games),
        'home_points_pct': np.random.uniform(0.4, 0.8, n_games),
        'away_points_pct': np.random.uniform(0.4, 0.8, n_games),
        
        # Offensive metrics
        'home_goals_avg': np.random.uniform(2.5, 3.8, n_games),
        'away_goals_avg': np.random.uniform(2.5, 3.8, n_games),
        'home_shots_avg': np.random.uniform(28, 35, n_games),
        'away_shots_avg': np.random.uniform(28, 35, n_games),
        
        # Defensive metrics
        'home_goals_against_avg': np.random.uniform(2.2, 3.5, n_games),
        'away_goals_against_avg': np.random.uniform(2.2, 3.5, n_games),
        'home_save_pct': np.random.uniform(0.88, 0.93, n_games),
        'away_save_pct': np.random.uniform(0.88, 0.93, n_games),
        
        # Special teams
        'home_pp_pct': np.random.uniform(0.15, 0.28, n_games),
        'away_pp_pct': np.random.uniform(0.15, 0.28, n_games),
        'home_pk_pct': np.random.uniform(0.75, 0.88, n_games),
        'away_pk_pct': np.random.uniform(0.75, 0.88, n_games),
        
        # Context
        'home_rest_days': np.random.randint(1, 5, n_games),
        'away_rest_days': np.random.randint(1, 5, n_games),
        'home_b2b': np.random.binomial(1, 0.15, n_games),
        'away_b2b': np.random.binomial(1, 0.15, n_games),
        
        # Recent form (last 5 games)
        'home_goals_last5': np.random.uniform(2.0, 4.0, n_games),
        'away_goals_last5': np.random.uniform(2.0, 4.0, n_games),
        'home_wins_last5': np.random.randint(0, 6, n_games),
        'away_wins_last5': np.random.randint(0, 6, n_games),
    })
    
    # Generate realistic goal totals
    home_advantage = 0.35
    
    data['home_goals'] = np.round(
        data['home_goals_avg'] * 0.3 +
        data['home_goals_last5'] * 0.2 +
        (4 - data['away_goals_against_avg']) * 0.3 +
        data['home_pp_pct'] * 3 +
        home_advantage +
        (data['home_rest_days'] - data['away_rest_days']) * 0.1 +
        np.random.normal(0, 0.8, n_games)
    ).clip(0, 9).astype(int)
    
    data['away_goals'] = np.round(
        data['away_goals_avg'] * 0.3 +
        data['away_goals_last5'] * 0.2 +
        (4 - data['home_goals_against_avg']) * 0.3 +
        data['away_pp_pct'] * 3 +
        np.random.normal(0, 0.8, n_games)
    ).clip(0, 9).astype(int)
    
    print(f"Generated {n_games} synthetic games")

print(f"\nDataset shape: {data.shape}")
print(f"Home goals mean: {data['home_goals'].mean():.2f}")
print(f"Away goals mean: {data['away_goals'].mean():.2f}")

In [None]:
# Prepare features and targets
target_cols = ['home_goals', 'away_goals']
exclude_cols = target_cols + ['home_team', 'away_team', 'date', 'game_id', 'season']

feature_cols = [col for col in data.columns if col not in exclude_cols]
print(f"Features ({len(feature_cols)}): {feature_cols[:10]}...")

X = data[feature_cols]
y_home = data['home_goals']
y_away = data['away_goals']

## 3. Data Preprocessing

In [None]:
# Train/validation/test split (60/20/20)
X_trainval, X_test, y_home_trainval, y_home_test, y_away_trainval, y_away_test = train_test_split(
    X, y_home, y_away, test_size=0.2, random_state=42
)

X_train, X_val, y_home_train, y_home_val, y_away_train, y_away_val = train_test_split(
    X_trainval, y_home_trainval, y_away_trainval, test_size=0.25, random_state=42
)

print(f"Training set: {len(X_train)} games")
print(f"Validation set: {len(X_val)} games")
print(f"Test set: {len(X_test)} games")

In [None]:
# Feature scaling (CRITICAL for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print("Features scaled using StandardScaler")
print(f"Train mean: {X_train_scaled.mean():.4f}, std: {X_train_scaled.std():.4f}")

## 4. Baseline Neural Network

In [None]:
# Default parameters
default_params = {
    'hidden_layer_sizes': (100, 50),
    'activation': 'relu',
    'solver': 'adam',
    'alpha': 0.001,  # L2 regularization
    'learning_rate': 'adaptive',
    'learning_rate_init': 0.001,
    'max_iter': 500,
    'early_stopping': True,
    'validation_fraction': 0.1,
    'n_iter_no_change': 20,
    'random_state': 42,
}

print("Default Neural Network Parameters:")
for k, v in default_params.items():
    print(f"  {k}: {v}")

In [None]:
# Train baseline models
print("Training baseline home goals model...")
baseline_home = MLPRegressor(**default_params)
baseline_home.fit(X_train_scaled, y_home_train)
print(f"  Converged in {baseline_home.n_iter_} iterations")

print("Training baseline away goals model...")
baseline_away = MLPRegressor(**default_params)
baseline_away.fit(X_train_scaled, y_away_train)
print(f"  Converged in {baseline_away.n_iter_} iterations")

print("\nBaseline models trained!")

In [None]:
# Evaluation function
def evaluate_models(home_model, away_model, X, y_home, y_away):
    """Evaluate both models and return combined metrics."""
    home_pred = home_model.predict(X)
    away_pred = away_model.predict(X)
    
    metrics = {
        'home_rmse': np.sqrt(mean_squared_error(y_home, home_pred)),
        'away_rmse': np.sqrt(mean_squared_error(y_away, away_pred)),
        'home_mae': mean_absolute_error(y_home, home_pred),
        'away_mae': mean_absolute_error(y_away, away_pred),
        'home_r2': r2_score(y_home, home_pred),
        'away_r2': r2_score(y_away, away_pred),
    }
    
    # Combined metrics
    all_pred = np.concatenate([home_pred, away_pred])
    all_actual = np.concatenate([y_home, y_away])
    metrics['combined_rmse'] = np.sqrt(mean_squared_error(all_actual, all_pred))
    metrics['combined_mae'] = mean_absolute_error(all_actual, all_pred)
    metrics['combined_r2'] = r2_score(all_actual, all_pred)
    
    return metrics

baseline_metrics = evaluate_models(baseline_home, baseline_away, X_val_scaled, y_home_val, y_away_val)

print("\n Baseline Validation Performance")
print("=" * 45)
print(f"\nHome Goals:")
print(f"  RMSE: {baseline_metrics['home_rmse']:.4f}")
print(f"  MAE:  {baseline_metrics['home_mae']:.4f}")
print(f"  R²:   {baseline_metrics['home_r2']:.4f}")
print(f"\nAway Goals:")
print(f"  RMSE: {baseline_metrics['away_rmse']:.4f}")
print(f"  MAE:  {baseline_metrics['away_mae']:.4f}")
print(f"  R²:   {baseline_metrics['away_r2']:.4f}")
print(f"\nCombined:")
print(f"  RMSE: {baseline_metrics['combined_rmse']:.4f}")
print(f"  MAE:  {baseline_metrics['combined_mae']:.4f}")
print(f"  R²:   {baseline_metrics['combined_r2']:.4f}")

## 5. Random Search Hyperparameter Tuning

In [None]:
# Parameter distributions for random search
param_distributions = {
    'hidden_layer_sizes': [
        (50,),
        (100,),
        (50, 25),
        (100, 50),
        (128, 64),
        (100, 50, 25),
        (128, 64, 32),
        (200, 100),
    ],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate_init': [0.0001, 0.001, 0.01],
    'batch_size': [32, 64, 128, 'auto'],
}

print(f"Parameters to tune: {len(param_distributions)}")
for k, v in param_distributions.items():
    print(f"  {k}: {v}")

In [None]:
def random_search_nn(X_train, y_train, X_val, y_val, param_dist, n_iter=30):
    """
    Perform random search for neural network hyperparameters.
    """
    results = []
    
    for i in range(n_iter):
        # Sample random parameters
        params = {k: np.random.choice(v) if k != 'hidden_layer_sizes' else v[np.random.randint(len(v))] 
                  for k, v in param_dist.items()}
        params['solver'] = 'adam'
        params['max_iter'] = 300
        params['early_stopping'] = True
        params['validation_fraction'] = 0.1
        params['n_iter_no_change'] = 15
        params['random_state'] = 42
        
        try:
            # Train model
            model = MLPRegressor(**params)
            model.fit(X_train, y_train)
            
            # Evaluate
            train_pred = model.predict(X_train)
            val_pred = model.predict(X_val)
            
            train_rmse = np.sqrt(mean_squared_error(y_train, train_pred))
            val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
            
            result = {
                'iteration': i + 1,
                'train_rmse': train_rmse,
                'val_rmse': val_rmse,
                'n_iter': model.n_iter_,
                'overfit_ratio': train_rmse / val_rmse if val_rmse > 0 else 0,
                **{k: str(v) for k, v in params.items()}
            }
            results.append(result)
            
            if (i + 1) % 5 == 0:
                print(f"  Iteration {i + 1}/{n_iter}: Val RMSE = {val_rmse:.4f} (arch: {params['hidden_layer_sizes']})")
        
        except Exception as e:
            print(f"  Iteration {i + 1} failed: {e}")
    
    return pd.DataFrame(results)

print("Starting Random Search for Home Goals...")
home_random_results = random_search_nn(X_train_scaled, y_home_train, X_val_scaled, y_home_val, param_distributions, n_iter=30)

print("\nStarting Random Search for Away Goals...")
away_random_results = random_search_nn(X_train_scaled, y_away_train, X_val_scaled, y_away_val, param_distributions, n_iter=30)

In [None]:
# Find best configurations
best_home_idx = home_random_results['val_rmse'].idxmin()
best_away_idx = away_random_results['val_rmse'].idxmin()

best_home_config = home_random_results.loc[best_home_idx]
best_away_config = away_random_results.loc[best_away_idx]

print("\nBest Home Goals Configuration:")
print(f"  Val RMSE: {best_home_config['val_rmse']:.4f}")
for param in param_distributions.keys():
    print(f"  {param}: {best_home_config[param]}")

print("\nBest Away Goals Configuration:")
print(f"  Val RMSE: {best_away_config['val_rmse']:.4f}")
for param in param_distributions.keys():
    print(f"  {param}: {best_away_config[param]}")

## 6. Grid Search (Fine-tuning)

In [None]:
# Fine-tune around best architecture
from itertools import product

fine_tune_params = {
    'hidden_layer_sizes': [(100, 50), (128, 64), (100, 50, 25)],
    'alpha': [0.0005, 0.001, 0.005],
    'learning_rate_init': [0.0005, 0.001, 0.002],
}

def grid_search_nn(X_train, y_train, X_val, y_val, param_grid):
    """Perform grid search for neural network hyperparameters."""
    results = []
    
    # Generate all combinations
    keys = list(param_grid.keys())
    combinations = list(product(*[param_grid[k] for k in keys]))
    
    print(f"Testing {len(combinations)} combinations...")
    
    for i, combo in enumerate(combinations):
        params = dict(zip(keys, combo))
        params['solver'] = 'adam'
        params['activation'] = 'relu'
        params['max_iter'] = 500
        params['early_stopping'] = True
        params['validation_fraction'] = 0.1
        params['n_iter_no_change'] = 20
        params['random_state'] = 42
        
        try:
            model = MLPRegressor(**params)
            model.fit(X_train, y_train)
            
            val_pred = model.predict(X_val)
            val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
            
            result = {'val_rmse': val_rmse, 'n_iter': model.n_iter_}
            result.update({k: str(v) for k, v in params.items() if k in keys})
            results.append(result)
            
            if (i + 1) % 10 == 0:
                print(f"  Progress: {i + 1}/{len(combinations)}")
        
        except Exception as e:
            print(f"  Combo {i + 1} failed: {e}")
    
    return pd.DataFrame(results)

print("Grid Search for Home Goals:")
home_grid_results = grid_search_nn(X_train_scaled, y_home_train, X_val_scaled, y_home_val, fine_tune_params)

print("\nGrid Search for Away Goals:")
away_grid_results = grid_search_nn(X_train_scaled, y_away_train, X_val_scaled, y_away_val, fine_tune_params)

In [None]:
# Best from grid search
best_home_grid = home_grid_results.loc[home_grid_results['val_rmse'].idxmin()]
best_away_grid = away_grid_results.loc[away_grid_results['val_rmse'].idxmin()]

print("Best Grid Search Results:")
print(f"\nHome Goals - Val RMSE: {best_home_grid['val_rmse']:.4f}")
print(f"  Architecture: {best_home_grid['hidden_layer_sizes']}")
print(f"  Alpha: {best_home_grid['alpha']}")
print(f"  Learning rate: {best_home_grid['learning_rate_init']}")

print(f"\nAway Goals - Val RMSE: {best_away_grid['val_rmse']:.4f}")
print(f"  Architecture: {best_away_grid['hidden_layer_sizes']}")
print(f"  Alpha: {best_away_grid['alpha']}")
print(f"  Learning rate: {best_away_grid['learning_rate_init']}")

## 7. Learning Curve Analysis

In [None]:
# Train final model with best parameters
# Parse best parameters (stored as strings)
import ast

best_params = {
    'hidden_layer_sizes': ast.literal_eval(best_home_grid['hidden_layer_sizes']),
    'alpha': float(best_home_grid['alpha']),
    'learning_rate_init': float(best_home_grid['learning_rate_init']),
    'solver': 'adam',
    'activation': 'relu',
    'max_iter': 1000,
    'early_stopping': True,
    'validation_fraction': 0.1,
    'n_iter_no_change': 30,
    'random_state': 42,
}

print("Best Parameters:")
for k, v in best_params.items():
    print(f"  {k}: {v}")

In [None]:
# Learning curve analysis
X_full_scaled = np.vstack([X_train_scaled, X_val_scaled])
y_home_full = pd.concat([y_home_train, y_home_val])
y_away_full = pd.concat([y_away_train, y_away_val])

train_sizes, train_scores, val_scores = learning_curve(
    MLPRegressor(**best_params),
    X_full_scaled, y_home_full,
    train_sizes=np.linspace(0.2, 1.0, 5),
    cv=3,
    scoring='neg_root_mean_squared_error',
    n_jobs=-1
)

plt.figure(figsize=(10, 6))
plt.plot(train_sizes, -train_scores.mean(axis=1), 'o-', label='Training RMSE')
plt.plot(train_sizes, -val_scores.mean(axis=1), 'o-', label='Validation RMSE')
plt.fill_between(train_sizes, 
                 -train_scores.mean(axis=1) - train_scores.std(axis=1),
                 -train_scores.mean(axis=1) + train_scores.std(axis=1),
                 alpha=0.2)
plt.fill_between(train_sizes, 
                 -val_scores.mean(axis=1) - val_scores.std(axis=1),
                 -val_scores.mean(axis=1) + val_scores.std(axis=1),
                 alpha=0.2)
plt.xlabel('Training Set Size')
plt.ylabel('RMSE')
plt.title('Learning Curve - Neural Network (Home Goals)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# 5-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

home_cv_scores = cross_val_score(
    MLPRegressor(**best_params),
    X_full_scaled, y_home_full,
    cv=kfold,
    scoring='neg_root_mean_squared_error'
)

away_cv_scores = cross_val_score(
    MLPRegressor(**best_params),
    X_full_scaled, y_away_full,
    cv=kfold,
    scoring='neg_root_mean_squared_error'
)

print("5-Fold Cross-Validation Results:")
print(f"\nHome Goals RMSE: {-home_cv_scores.mean():.4f} (+/- {home_cv_scores.std():.4f})")
print(f"Away Goals RMSE: {-away_cv_scores.mean():.4f} (+/- {away_cv_scores.std():.4f})")

## 8. Final Model Evaluation

In [None]:
# Train final models on all training + validation data
print("Training final models...")

final_home = MLPRegressor(**best_params)
final_home.fit(X_full_scaled, y_home_full)
print(f"  Home model converged in {final_home.n_iter_} iterations")

final_away = MLPRegressor(**best_params)
final_away.fit(X_full_scaled, y_away_full)
print(f"  Away model converged in {final_away.n_iter_} iterations")

In [None]:
# Evaluate on test set
test_metrics = evaluate_models(final_home, final_away, X_test_scaled, y_home_test, y_away_test)

print("\n Final Test Set Performance")
print("=" * 45)
print(f"\nHome Goals:")
print(f"  RMSE: {test_metrics['home_rmse']:.4f}")
print(f"  MAE:  {test_metrics['home_mae']:.4f}")
print(f"  R²:   {test_metrics['home_r2']:.4f}")
print(f"\nAway Goals:")
print(f"  RMSE: {test_metrics['away_rmse']:.4f}")
print(f"  MAE:  {test_metrics['away_mae']:.4f}")
print(f"  R²:   {test_metrics['away_r2']:.4f}")
print(f"\nCombined:")
print(f"  RMSE: {test_metrics['combined_rmse']:.4f}")
print(f"  MAE:  {test_metrics['combined_mae']:.4f}")
print(f"  R²:   {test_metrics['combined_r2']:.4f}")

In [None]:
# Compare baseline vs tuned
print("\n Improvement Over Baseline")
print("=" * 45)

baseline_test = evaluate_models(baseline_home, baseline_away, X_test_scaled, y_home_test, y_away_test)

home_improvement = (baseline_test['home_rmse'] - test_metrics['home_rmse']) / baseline_test['home_rmse'] * 100
away_improvement = (baseline_test['away_rmse'] - test_metrics['away_rmse']) / baseline_test['away_rmse'] * 100
combined_improvement = (baseline_test['combined_rmse'] - test_metrics['combined_rmse']) / baseline_test['combined_rmse'] * 100

print(f"Home Goals RMSE: {baseline_test['home_rmse']:.4f} -> {test_metrics['home_rmse']:.4f} ({home_improvement:+.1f}%)")
print(f"Away Goals RMSE: {baseline_test['away_rmse']:.4f} -> {test_metrics['away_rmse']:.4f} ({away_improvement:+.1f}%)")
print(f"Combined RMSE:   {baseline_test['combined_rmse']:.4f} -> {test_metrics['combined_rmse']:.4f} ({combined_improvement:+.1f}%)")

In [None]:
# Visualize predictions vs actual
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

home_pred = final_home.predict(X_test_scaled)
away_pred = final_away.predict(X_test_scaled)

# Home goals
axes[0].scatter(y_home_test, home_pred, alpha=0.5, edgecolor='k', linewidth=0.5)
axes[0].plot([0, 8], [0, 8], 'r--', lw=2)
axes[0].set_xlabel('Actual Home Goals')
axes[0].set_ylabel('Predicted Home Goals')
axes[0].set_title(f'Home Goals: RMSE = {test_metrics["home_rmse"]:.4f}')

# Away goals
axes[1].scatter(y_away_test, away_pred, alpha=0.5, edgecolor='k', linewidth=0.5)
axes[1].plot([0, 8], [0, 8], 'r--', lw=2)
axes[1].set_xlabel('Actual Away Goals')
axes[1].set_ylabel('Predicted Away Goals')
axes[1].set_title(f'Away Goals: RMSE = {test_metrics["away_rmse"]:.4f}')

plt.tight_layout()
plt.show()

## 9. Save Best Model

In [None]:
# Save models
output_dir = 'output/models/neural_network'
os.makedirs(output_dir, exist_ok=True)

# Save neural network models
with open(f'{output_dir}/neural_network_home.pkl', 'wb') as f:
    pickle.dump(final_home, f)

with open(f'{output_dir}/neural_network_away.pkl', 'wb') as f:
    pickle.dump(final_away, f)

# Save scaler (required for inference)
with open(f'{output_dir}/neural_network_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Save model info
model_info = {
    'model_type': 'MLPRegressor',
    'best_params': {k: str(v) for k, v in best_params.items()},
    'test_metrics': test_metrics,
    'cv_home_rmse': float(-home_cv_scores.mean()),
    'cv_home_std': float(home_cv_scores.std()),
    'cv_away_rmse': float(-away_cv_scores.mean()),
    'cv_away_std': float(away_cv_scores.std()),
    'feature_cols': feature_cols,
    'home_n_iter': int(final_home.n_iter_),
    'away_n_iter': int(final_away.n_iter_),
    'trained_at': datetime.now().isoformat(),
}

with open(f'{output_dir}/neural_network_info.json', 'w') as f:
    json.dump(model_info, f, indent=2, default=str)

print(f"Models saved to {output_dir}/")
print(f"  - neural_network_home.pkl")
print(f"  - neural_network_away.pkl")
print(f"  - neural_network_scaler.pkl")
print(f"  - neural_network_info.json")

In [None]:
# Plot training loss curves
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot(final_home.loss_curve_)
axes[0].set_xlabel('Iteration')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Loss - Home Goals Model')
axes[0].grid(True)

axes[1].plot(final_away.loss_curve_)
axes[1].set_xlabel('Iteration')
axes[1].set_ylabel('Loss')
axes[1].set_title('Training Loss - Away Goals Model')
axes[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Summary
print("\n" + "=" * 60)
print(" NEURAL NETWORK TRAINING COMPLETE")
print("=" * 60)
print(f"\nArchitecture: {best_params['hidden_layer_sizes']}")
print(f"Activation: {best_params['activation']}")
print(f"Alpha (L2): {best_params['alpha']}")
print(f"Learning Rate: {best_params['learning_rate_init']}")
print(f"\nFinal Test Performance:")
print(f"  Combined RMSE: {test_metrics['combined_rmse']:.4f}")
print(f"  Combined MAE:  {test_metrics['combined_mae']:.4f}")
print(f"  Combined R²:   {test_metrics['combined_r2']:.4f}")
print(f"\nCross-Validation (5-fold):")
print(f"  Home RMSE: {-home_cv_scores.mean():.4f} (+/- {home_cv_scores.std():.4f})")
print(f"  Away RMSE: {-away_cv_scores.mean():.4f} (+/- {away_cv_scores.std():.4f})")