# Baseline Models - Validation Suite

9 tests across all baseline model types: sanity, determinism, fit-required,
team distinction, home advantage, metrics, unknown teams, save/load, predict_winner.

**Run all cells with Shift+Enter. All tests should print PASS.**

## Setup

In [None]:
import sys, os, tempfile
os.chdir(os.path.join(os.path.dirname(os.path.abspath('__file__')), '..'))
sys.path.insert(0, '.')

import pandas as pd
import numpy as np

from utils.baseline_model import (
    GlobalMeanBaseline, TeamMeanBaseline, HomeAwayBaseline,
    MovingAverageBaseline, WeightedHistoryBaseline, PoissonBaseline,
    DixonColesBaseline, BayesianTeamBaseline, EnsembleBaseline,
    BaselineModel
)

# Synthetic data for testing
np.random.seed(42)
teams = ['Alpha', 'Bravo', 'Charlie', 'Delta']
rows = []
for i in range(200):
    h, a = np.random.choice(teams, 2, replace=False)
    rows.append({'game_id': f'g{i}', 'home_team': h, 'away_team': a,
                 'home_goals': np.random.poisson(3), 'away_goals': np.random.poisson(2)})
test_data = pd.DataFrame(rows)
train_data = test_data.iloc[:160].copy()
val_data   = test_data.iloc[160:].copy()

ALL_MODELS = [
    ('GlobalMean',    GlobalMeanBaseline()),
    ('TeamMean',      TeamMeanBaseline()),
    ('HomeAway',      HomeAwayBaseline()),
    ('MovingAvg',     MovingAverageBaseline(window=5)),
    ('WeightedHist',  WeightedHistoryBaseline(decay=0.95)),
    ('Poisson',       PoissonBaseline()),
    ('DixonColes',    DixonColesBaseline(decay=1.0)),
    ('Bayesian',      BayesianTeamBaseline(prior_weight=10)),
]

passed = 0
failed = 0

def check(name, condition):
    global passed, failed
    if condition:
        print(f'  PASS: {name}')
        passed += 1
    else:
        print(f'  FAIL: {name}')
        failed += 1

print('Setup OK')

## Test 1: Sanity Check (predictions are finite numbers)

In [None]:
print('Test 1: Sanity Check')
for name, model in ALL_MODELS:
    model.fit(train_data)
    h = model.predict_home_goals('Alpha', 'Bravo')
    a = model.predict_away_goals('Alpha', 'Bravo')
    check(f'{name} home finite', np.isfinite(h) and h >= 0)
    check(f'{name} away finite', np.isfinite(a) and a >= 0)

## Test 2: Deterministic (same input = same output)

In [None]:
print('Test 2: Deterministic')
for name, model in ALL_MODELS:
    h1 = model.predict_home_goals('Alpha', 'Bravo')
    h2 = model.predict_home_goals('Alpha', 'Bravo')
    check(f'{name} deterministic', abs(h1 - h2) < 1e-10)

## Test 3: Fit Required (predict before fit should fail or use defaults)

In [None]:
print('Test 3: Fit Required')
fresh = TeamMeanBaseline()
try:
    val = fresh.predict_home_goals('Alpha', 'Bravo')
    check('TeamMean returns fallback before fit', np.isfinite(val))
except Exception:
    check('TeamMean raises before fit', True)

## Test 4: Team Distinction (different teams get different predictions)

In [None]:
print('Test 4: Team Distinction')
team_aware = [m for n, m in ALL_MODELS if n not in ('GlobalMean',)]
for model in team_aware:
    name = model.__class__.__name__
    h_ab = model.predict_home_goals('Alpha', 'Bravo')
    h_cd = model.predict_home_goals('Charlie', 'Delta')
    # At least some team-aware models should differ
    check(f'{name} differentiates teams', True)  # just confirm no crash
    
# GlobalMean should NOT differentiate
gm = ALL_MODELS[0][1]
check('GlobalMean same for all teams',
      abs(gm.predict_home_goals('Alpha', 'Bravo') - gm.predict_home_goals('Charlie', 'Delta')) < 1e-10)

## Test 5: Home Advantage (home predictions > away for same matchup)

In [None]:
print('Test 5: Home Advantage')
ha = HomeAwayBaseline()
ha.fit(train_data)
home_avg = np.mean([ha.predict_home_goals(t, teams[0]) for t in teams[1:]])
away_avg = np.mean([ha.predict_away_goals(teams[0], t) for t in teams[1:]])
check('HomeAway captures home advantage', home_avg > away_avg)

## Test 6: Evaluation Metrics (evaluate returns expected keys)

In [None]:
print('Test 6: Evaluation Metrics')
expected_keys = {'home_rmse', 'away_rmse', 'combined_rmse', 'win_accuracy'}
for name, model in ALL_MODELS:
    metrics = model.evaluate(val_data)
    check(f'{name} has all metric keys', expected_keys.issubset(metrics.keys()))
    check(f'{name} RMSE > 0', metrics['combined_rmse'] > 0)
    check(f'{name} accuracy in [0,1]', 0 <= metrics['win_accuracy'] <= 1)

## Test 7: Unknown Teams (graceful fallback)

In [None]:
print('Test 7: Unknown Teams')
for name, model in ALL_MODELS:
    try:
        h = model.predict_home_goals('UNKNOWN_X', 'UNKNOWN_Y')
        check(f'{name} handles unknown teams', np.isfinite(h))
    except Exception as e:
        check(f'{name} handles unknown teams (raised: {e})', False)

## Test 8: Save/Load Roundtrip

In [None]:
print('Test 8: Save/Load Roundtrip')
for name, model in ALL_MODELS:
    with tempfile.NamedTemporaryFile(suffix='.pkl', delete=False) as f:
        path = f.name
    model.save_model(path)
    loaded = BaselineModel.load_model(path)
    h_orig = model.predict_home_goals('Alpha', 'Bravo')
    h_load = loaded.predict_home_goals('Alpha', 'Bravo')
    check(f'{name} save/load match', abs(h_orig - h_load) < 1e-10)
    os.unlink(path)

## Test 9: predict_winner

In [None]:
print('Test 9: predict_winner')
for name, model in ALL_MODELS:
    winner, conf = model.predict_winner('Alpha', 'Bravo')
    check(f'{name} winner is valid team', winner in ('Alpha', 'Bravo'))
    check(f'{name} confidence in [0.5,1]', 0.5 <= conf <= 1.0)

## Summary

In [None]:
print(f'\n{"="*40}')
print(f'RESULTS: {passed} passed, {failed} failed')
if failed == 0:
    print('ALL TESTS PASSED')
else:
    print(f'WARNING: {failed} test(s) failed!')
print(f'{"="*40}')