# Model 5: Ensemble - Training and Hyperparameter Tuning

This notebook trains an ensemble model combining multiple base models for hockey goal prediction.

## Table of Contents

1. Setup and Imports
2. Load Data
3. Train Base Models
4. Ensemble Methods
5. Stacking Ensemble
6. Weighted Average Ensemble
7. Cross-Validation Analysis
8. Final Model Evaluation
9. Save Best Ensemble

## Ensemble Approaches

- **Simple Averaging**: Average predictions from all base models
- **Weighted Averaging**: Weight models by inverse validation error
- **Stacking**: Train a meta-model on base model predictions

## 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import Ridge, LinearRegression, ElasticNet
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import os
import sys
import json
import yaml
import pickle
from datetime import datetime
import warnings

warnings.filterwarnings('ignore')

# Add parent directory for imports
sys.path.insert(0, os.path.dirname(os.getcwd()))

# Try to import XGBoost
try:
    import xgboost as xgb
    XGB_AVAILABLE = True
    print("XGBoost available")
except ImportError:
    XGB_AVAILABLE = False
    print("XGBoost not available, using GradientBoosting instead")

# Configure plotting
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11
sns.set_style('whitegrid')

print("\nSetup complete.")

In [None]:
# Load hyperparameter configuration
config_path = '../../config/hyperparams/model5_ensemble.yaml'

if os.path.exists(config_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    print(f"Loaded config: {config['model_name']}")
    print(f"Description: {config['description']}")
else:
    print(f"Config not found at {config_path}, using defaults")
    config = None

## 2. Load or Generate Data

In [None]:
# Try to load real data, otherwise generate synthetic
data_path = '../data/hockey_features.csv'

if os.path.exists(data_path):
    data = pd.read_csv(data_path)
    print(f"Loaded {len(data)} games from {data_path}")
else:
    print("Generating synthetic hockey data for demonstration...")
    
    np.random.seed(42)
    n_games = 2000
    
    data = pd.DataFrame({
        # Team strength metrics
        'home_win_pct': np.random.uniform(0.3, 0.7, n_games),
        'away_win_pct': np.random.uniform(0.3, 0.7, n_games),
        'home_points_pct': np.random.uniform(0.4, 0.8, n_games),
        'away_points_pct': np.random.uniform(0.4, 0.8, n_games),
        
        # Offensive metrics
        'home_goals_avg': np.random.uniform(2.5, 3.8, n_games),
        'away_goals_avg': np.random.uniform(2.5, 3.8, n_games),
        'home_shots_avg': np.random.uniform(28, 35, n_games),
        'away_shots_avg': np.random.uniform(28, 35, n_games),
        
        # Defensive metrics
        'home_goals_against_avg': np.random.uniform(2.2, 3.5, n_games),
        'away_goals_against_avg': np.random.uniform(2.2, 3.5, n_games),
        'home_save_pct': np.random.uniform(0.88, 0.93, n_games),
        'away_save_pct': np.random.uniform(0.88, 0.93, n_games),
        
        # Special teams
        'home_pp_pct': np.random.uniform(0.15, 0.28, n_games),
        'away_pp_pct': np.random.uniform(0.15, 0.28, n_games),
        'home_pk_pct': np.random.uniform(0.75, 0.88, n_games),
        'away_pk_pct': np.random.uniform(0.75, 0.88, n_games),
        
        # Context
        'home_rest_days': np.random.randint(1, 5, n_games),
        'away_rest_days': np.random.randint(1, 5, n_games),
        'home_b2b': np.random.binomial(1, 0.15, n_games),
        'away_b2b': np.random.binomial(1, 0.15, n_games),
        
        # Recent form (last 5 games)
        'home_goals_last5': np.random.uniform(2.0, 4.0, n_games),
        'away_goals_last5': np.random.uniform(2.0, 4.0, n_games),
        'home_wins_last5': np.random.randint(0, 6, n_games),
        'away_wins_last5': np.random.randint(0, 6, n_games),
    })
    
    # Generate realistic goal totals
    home_advantage = 0.35
    
    data['home_goals'] = np.round(
        data['home_goals_avg'] * 0.3 +
        data['home_goals_last5'] * 0.2 +
        (4 - data['away_goals_against_avg']) * 0.3 +
        data['home_pp_pct'] * 3 +
        home_advantage +
        (data['home_rest_days'] - data['away_rest_days']) * 0.1 +
        np.random.normal(0, 0.8, n_games)
    ).clip(0, 9).astype(int)
    
    data['away_goals'] = np.round(
        data['away_goals_avg'] * 0.3 +
        data['away_goals_last5'] * 0.2 +
        (4 - data['home_goals_against_avg']) * 0.3 +
        data['away_pp_pct'] * 3 +
        np.random.normal(0, 0.8, n_games)
    ).clip(0, 9).astype(int)
    
    print(f"Generated {n_games} synthetic games")

print(f"\nDataset shape: {data.shape}")
print(f"Home goals mean: {data['home_goals'].mean():.2f}")
print(f"Away goals mean: {data['away_goals'].mean():.2f}")

In [None]:
# Prepare features and targets
target_cols = ['home_goals', 'away_goals']
exclude_cols = target_cols + ['home_team', 'away_team', 'date', 'game_id', 'season']

feature_cols = [col for col in data.columns if col not in exclude_cols]
print(f"Features ({len(feature_cols)}): {feature_cols[:10]}...")

X = data[feature_cols]
y_home = data['home_goals']
y_away = data['away_goals']

In [None]:
# Train/validation/test split (60/20/20)
X_trainval, X_test, y_home_trainval, y_home_test, y_away_trainval, y_away_test = train_test_split(
    X, y_home, y_away, test_size=0.2, random_state=42
)

X_train, X_val, y_home_train, y_home_val, y_away_train, y_away_val = train_test_split(
    X_trainval, y_home_trainval, y_away_trainval, test_size=0.25, random_state=42
)

# Scale features for linear models
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"Training set: {len(X_train)} games")
print(f"Validation set: {len(X_val)} games")
print(f"Test set: {len(X_test)} games")

## 3. Train Base Models

In [None]:
def evaluate_model(model, X, y):
    """Evaluate a single model."""
    pred = model.predict(X)
    return {
        'rmse': np.sqrt(mean_squared_error(y, pred)),
        'mae': mean_absolute_error(y, pred),
        'r2': r2_score(y, pred),
    }

def evaluate_ensemble(home_model, away_model, X, y_home, y_away):
    """Evaluate both models and return combined metrics."""
    home_pred = home_model.predict(X)
    away_pred = away_model.predict(X)
    
    metrics = {
        'home_rmse': np.sqrt(mean_squared_error(y_home, home_pred)),
        'away_rmse': np.sqrt(mean_squared_error(y_away, away_pred)),
        'home_mae': mean_absolute_error(y_home, home_pred),
        'away_mae': mean_absolute_error(y_away, away_pred),
        'home_r2': r2_score(y_home, home_pred),
        'away_r2': r2_score(y_away, away_pred),
    }
    
    # Combined metrics
    all_pred = np.concatenate([home_pred, away_pred])
    all_actual = np.concatenate([y_home.values, y_away.values])
    metrics['combined_rmse'] = np.sqrt(mean_squared_error(all_actual, all_pred))
    metrics['combined_mae'] = mean_absolute_error(all_actual, all_pred)
    metrics['combined_r2'] = r2_score(all_actual, all_pred)
    
    return metrics

In [None]:
# Define base models
base_models = {
    'ridge': Ridge(alpha=1.0),
    'elastic_net': ElasticNet(alpha=0.1, l1_ratio=0.5),
    'random_forest': RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42, n_jobs=-1),
    'gradient_boost': GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=42),
}

if XGB_AVAILABLE:
    base_models['xgboost'] = xgb.XGBRegressor(
        n_estimators=200, 
        max_depth=6, 
        learning_rate=0.1,
        random_state=42, 
        n_jobs=-1
    )

print(f"Base models: {list(base_models.keys())}")

In [None]:
# Train and evaluate base models for HOME goals
home_base_models = {}
home_base_predictions = {}
home_base_metrics = {}

print("Training Base Models for Home Goals:")
print("=" * 50)

for name, model in base_models.items():
    # Clone model
    m = model.__class__(**model.get_params())
    
    # Train (use scaled for linear models)
    if name in ['ridge', 'elastic_net']:
        m.fit(X_train_scaled, y_home_train)
        val_pred = m.predict(X_val_scaled)
    else:
        m.fit(X_train, y_home_train)
        val_pred = m.predict(X_val)
    
    # Store
    home_base_models[name] = m
    home_base_predictions[name] = val_pred
    
    # Metrics
    rmse = np.sqrt(mean_squared_error(y_home_val, val_pred))
    mae = mean_absolute_error(y_home_val, val_pred)
    home_base_metrics[name] = {'rmse': rmse, 'mae': mae}
    
    print(f"  {name:15} - RMSE: {rmse:.4f}, MAE: {mae:.4f}")

print()

In [None]:
# Train and evaluate base models for AWAY goals
away_base_models = {}
away_base_predictions = {}
away_base_metrics = {}

print("Training Base Models for Away Goals:")
print("=" * 50)

for name, model in base_models.items():
    # Clone model
    m = model.__class__(**model.get_params())
    
    # Train (use scaled for linear models)
    if name in ['ridge', 'elastic_net']:
        m.fit(X_train_scaled, y_away_train)
        val_pred = m.predict(X_val_scaled)
    else:
        m.fit(X_train, y_away_train)
        val_pred = m.predict(X_val)
    
    # Store
    away_base_models[name] = m
    away_base_predictions[name] = val_pred
    
    # Metrics
    rmse = np.sqrt(mean_squared_error(y_away_val, val_pred))
    mae = mean_absolute_error(y_away_val, val_pred)
    away_base_metrics[name] = {'rmse': rmse, 'mae': mae}
    
    print(f"  {name:15} - RMSE: {rmse:.4f}, MAE: {mae:.4f}")

print()

## 4. Simple Average Ensemble

In [None]:
# Simple average of all predictions
home_avg_pred = np.mean(list(home_base_predictions.values()), axis=0)
away_avg_pred = np.mean(list(away_base_predictions.values()), axis=0)

avg_home_rmse = np.sqrt(mean_squared_error(y_home_val, home_avg_pred))
avg_away_rmse = np.sqrt(mean_squared_error(y_away_val, away_avg_pred))

print("Simple Average Ensemble:")
print(f"  Home RMSE: {avg_home_rmse:.4f}")
print(f"  Away RMSE: {avg_away_rmse:.4f}")

## 5. Weighted Average Ensemble

In [None]:
# Calculate weights based on inverse RMSE (better models get more weight)
def get_inverse_weights(metrics_dict):
    """Calculate weights as inverse of RMSE."""
    rmses = np.array([m['rmse'] for m in metrics_dict.values()])
    inv_rmse = 1 / rmses
    weights = inv_rmse / inv_rmse.sum()  # Normalize to sum to 1
    return dict(zip(metrics_dict.keys(), weights))

home_weights = get_inverse_weights(home_base_metrics)
away_weights = get_inverse_weights(away_base_metrics)

print("Model Weights (based on inverse RMSE):")
print("\nHome Goals:")
for name, w in home_weights.items():
    print(f"  {name:15}: {w:.4f}")

print("\nAway Goals:")
for name, w in away_weights.items():
    print(f"  {name:15}: {w:.4f}")

In [None]:
# Weighted average predictions
home_weighted_pred = np.zeros(len(y_home_val))
for name, pred in home_base_predictions.items():
    home_weighted_pred += pred * home_weights[name]

away_weighted_pred = np.zeros(len(y_away_val))
for name, pred in away_base_predictions.items():
    away_weighted_pred += pred * away_weights[name]

weighted_home_rmse = np.sqrt(mean_squared_error(y_home_val, home_weighted_pred))
weighted_away_rmse = np.sqrt(mean_squared_error(y_away_val, away_weighted_pred))

print("Weighted Average Ensemble:")
print(f"  Home RMSE: {weighted_home_rmse:.4f}")
print(f"  Away RMSE: {weighted_away_rmse:.4f}")

## 6. Stacking Ensemble

In [None]:
# Create meta-features from base model predictions
def create_meta_features(base_predictions):
    """Stack base model predictions as features for meta-learner."""
    return np.column_stack(list(base_predictions.values()))

# Training meta-features (need to get predictions on training set via cross-val)
print("Creating meta-features via cross-validation...")

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# For simplicity, we'll use validation predictions to train meta-learner
# In production, use proper out-of-fold predictions

home_meta_X = create_meta_features(home_base_predictions)
away_meta_X = create_meta_features(away_base_predictions)

print(f"Meta-features shape: {home_meta_X.shape}")

In [None]:
# Train meta-learner (Ridge regression)
from sklearn.model_selection import GridSearchCV

# Split validation set for meta-learner
meta_train_idx = np.random.choice(len(home_meta_X), size=int(len(home_meta_X)*0.7), replace=False)
meta_val_idx = np.array([i for i in range(len(home_meta_X)) if i not in meta_train_idx])

home_meta_train = home_meta_X[meta_train_idx]
home_meta_val = home_meta_X[meta_val_idx]
y_home_meta_train = y_home_val.iloc[meta_train_idx]
y_home_meta_val = y_home_val.iloc[meta_val_idx]

away_meta_train = away_meta_X[meta_train_idx]
away_meta_val = away_meta_X[meta_val_idx]
y_away_meta_train = y_away_val.iloc[meta_train_idx]
y_away_meta_val = y_away_val.iloc[meta_val_idx]

# Train meta-models
home_meta_model = Ridge(alpha=1.0)
home_meta_model.fit(home_meta_train, y_home_meta_train)

away_meta_model = Ridge(alpha=1.0)
away_meta_model.fit(away_meta_train, y_away_meta_train)

# Evaluate
home_stack_pred = home_meta_model.predict(home_meta_val)
away_stack_pred = away_meta_model.predict(away_meta_val)

stack_home_rmse = np.sqrt(mean_squared_error(y_home_meta_val, home_stack_pred))
stack_away_rmse = np.sqrt(mean_squared_error(y_away_meta_val, away_stack_pred))

print("\nStacking Ensemble (on held-out validation):")
print(f"  Home RMSE: {stack_home_rmse:.4f}")
print(f"  Away RMSE: {stack_away_rmse:.4f}")

# Meta-model coefficients show relative importance of each base model
print("\nMeta-model Coefficients (Home):")
for name, coef in zip(home_base_models.keys(), home_meta_model.coef_):
    print(f"  {name:15}: {coef:.4f}")

## 7. Sklearn Stacking Regressor

In [None]:
# Define estimators for StackingRegressor
estimators = [
    ('ridge', Ridge(alpha=1.0)),
    ('rf', RandomForestRegressor(n_estimators=100, max_depth=8, random_state=42, n_jobs=-1)),
    ('gb', GradientBoostingRegressor(n_estimators=100, max_depth=5, random_state=42)),
]

# Create stacking ensemble for home goals
home_stacking = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge(alpha=1.0),
    cv=5,
    n_jobs=-1,
)

# Create stacking ensemble for away goals
away_stacking = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge(alpha=1.0),
    cv=5,
    n_jobs=-1,
)

print("Training Sklearn Stacking Ensemble...")
X_full_train = pd.concat([X_train, X_val])
y_home_full = pd.concat([y_home_train, y_home_val])
y_away_full = pd.concat([y_away_train, y_away_val])

home_stacking.fit(X_full_train, y_home_full)
away_stacking.fit(X_full_train, y_away_full)

print("Stacking models trained!")

## 8. Final Ensemble Comparison

In [None]:
# Create final ensemble class
class WeightedEnsemble:
    """Weighted ensemble of base models."""
    
    def __init__(self, base_models, weights, scaler=None, linear_models=None):
        self.base_models = base_models
        self.weights = weights
        self.scaler = scaler
        self.linear_models = linear_models or ['ridge', 'elastic_net']
    
    def predict(self, X):
        X_scaled = self.scaler.transform(X) if self.scaler else X
        
        pred = np.zeros(len(X))
        for name, model in self.base_models.items():
            if name in self.linear_models:
                pred += model.predict(X_scaled) * self.weights[name]
            else:
                pred += model.predict(X) * self.weights[name]
        return pred

# Create final ensembles
final_home_ensemble = WeightedEnsemble(home_base_models, home_weights, scaler)
final_away_ensemble = WeightedEnsemble(away_base_models, away_weights, scaler)

In [None]:
# Evaluate all approaches on test set
results = []

# Best individual model (XGBoost or Random Forest)
best_name = min(home_base_metrics, key=lambda x: home_base_metrics[x]['rmse'])
if best_name in ['ridge', 'elastic_net']:
    best_home_pred = home_base_models[best_name].predict(X_test_scaled)
    best_away_pred = away_base_models[best_name].predict(X_test_scaled)
else:
    best_home_pred = home_base_models[best_name].predict(X_test)
    best_away_pred = away_base_models[best_name].predict(X_test)

results.append({
    'method': f'Best Single ({best_name})',
    'home_rmse': np.sqrt(mean_squared_error(y_home_test, best_home_pred)),
    'away_rmse': np.sqrt(mean_squared_error(y_away_test, best_away_pred)),
})

# Simple Average
all_home_preds = []
all_away_preds = []
for name, model in home_base_models.items():
    if name in ['ridge', 'elastic_net']:
        all_home_preds.append(model.predict(X_test_scaled))
    else:
        all_home_preds.append(model.predict(X_test))
for name, model in away_base_models.items():
    if name in ['ridge', 'elastic_net']:
        all_away_preds.append(model.predict(X_test_scaled))
    else:
        all_away_preds.append(model.predict(X_test))

avg_home = np.mean(all_home_preds, axis=0)
avg_away = np.mean(all_away_preds, axis=0)

results.append({
    'method': 'Simple Average',
    'home_rmse': np.sqrt(mean_squared_error(y_home_test, avg_home)),
    'away_rmse': np.sqrt(mean_squared_error(y_away_test, avg_away)),
})

# Weighted Average
weighted_home = final_home_ensemble.predict(X_test)
weighted_away = final_away_ensemble.predict(X_test)

results.append({
    'method': 'Weighted Average',
    'home_rmse': np.sqrt(mean_squared_error(y_home_test, weighted_home)),
    'away_rmse': np.sqrt(mean_squared_error(y_away_test, weighted_away)),
})

# Stacking
stack_home = home_stacking.predict(X_test)
stack_away = away_stacking.predict(X_test)

results.append({
    'method': 'Stacking',
    'home_rmse': np.sqrt(mean_squared_error(y_home_test, stack_home)),
    'away_rmse': np.sqrt(mean_squared_error(y_away_test, stack_away)),
})

# Show results
results_df = pd.DataFrame(results)
results_df['combined_rmse'] = (results_df['home_rmse'] + results_df['away_rmse']) / 2
results_df = results_df.sort_values('combined_rmse')

print("\n Ensemble Comparison on Test Set")
print("=" * 60)
print(results_df.to_string(index=False))

In [None]:
# Visualize comparison
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(results_df))
width = 0.35

bars1 = ax.bar(x - width/2, results_df['home_rmse'], width, label='Home RMSE')
bars2 = ax.bar(x + width/2, results_df['away_rmse'], width, label='Away RMSE')

ax.set_ylabel('RMSE')
ax.set_title('Ensemble Methods Comparison')
ax.set_xticks(x)
ax.set_xticklabels(results_df['method'], rotation=15, ha='right')
ax.legend()

plt.tight_layout()
plt.show()

## 9. Save Best Ensemble

In [None]:
# Select best ensemble method
best_method = results_df.iloc[0]['method']
print(f"Best ensemble method: {best_method}")

# Prepare output directory
output_dir = '../../output/models'
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Save stacking models (most flexible)
with open(f'{output_dir}/ensemble_stacking_home.pkl', 'wb') as f:
    pickle.dump(home_stacking, f)

with open(f'{output_dir}/ensemble_stacking_away.pkl', 'wb') as f:
    pickle.dump(away_stacking, f)

# Save weighted ensemble components
ensemble_data = {
    'home_models': home_base_models,
    'away_models': away_base_models,
    'home_weights': home_weights,
    'away_weights': away_weights,
    'scaler': scaler,
    'linear_models': ['ridge', 'elastic_net'],
}

with open(f'{output_dir}/ensemble_weighted.pkl', 'wb') as f:
    pickle.dump(ensemble_data, f)

# Save model info
best_results = results_df.iloc[0]
model_info = {
    'model_type': 'Ensemble',
    'best_method': best_method,
    'base_models': list(base_models.keys()),
    'home_weights': {k: float(v) for k, v in home_weights.items()},
    'away_weights': {k: float(v) for k, v in away_weights.items()},
    'test_metrics': {
        'home_rmse': float(best_results['home_rmse']),
        'away_rmse': float(best_results['away_rmse']),
        'combined_rmse': float(best_results['combined_rmse']),
    },
    'all_results': results_df.to_dict('records'),
    'feature_cols': feature_cols,
    'trained_at': datetime.now().isoformat(),
}

with open(f'{output_dir}/ensemble_info.json', 'w') as f:
    json.dump(model_info, f, indent=2, default=str)

print(f"\nModels saved to {output_dir}/")
print(f"  - ensemble_stacking_home.pkl")
print(f"  - ensemble_stacking_away.pkl")
print(f"  - ensemble_weighted.pkl")
print(f"  - ensemble_info.json")

In [None]:
# Summary
print("\n" + "=" * 60)
print(" ENSEMBLE TRAINING COMPLETE")
print("=" * 60)
print(f"\nBest Method: {best_method}")
print(f"\nBase Models Used: {list(base_models.keys())}")
print(f"\nFinal Test Performance ({best_method}):")
print(f"  Home RMSE: {best_results['home_rmse']:.4f}")
print(f"  Away RMSE: {best_results['away_rmse']:.4f}")
print(f"  Combined RMSE: {best_results['combined_rmse']:.4f}")