# Baseline Elo Validation

Tests for BaselineEloModel: synthetic data, sanity checks, fit/predict/evaluate.

Run all cells. All tests should print PASS.

## Setup

In [None]:
import os, sys, pathlib
_cwd = pathlib.Path('.').resolve()
if _cwd.name == 'validation':
    _python_dir = _cwd.parent
elif (_cwd / 'python').is_dir():
    _python_dir = _cwd / 'python'
else:
    _python_dir = _cwd
os.chdir(_python_dir)
sys.path.insert(0, str(_python_dir))

import pandas as pd
import numpy as np
from utils.baseline_elo import BaselineEloModel

passed = 0
failed = 0

def check(name, condition):
    global passed, failed
    if condition:
        print(f'  PASS: {name}')
        passed += 1
    else:
        print(f'  FAIL: {name}')
        failed += 1

print('BaselineEloModel loaded')

## Test 1: Dominant team (synthetic)

Team A always beats Team B. Expected: A rating rises, B falls.

In [None]:
synthetic = pd.DataFrame({
    'home_team': ['Team_A'] * 10 + ['Team_B'] * 10,
    'away_team': ['Team_B'] * 10 + ['Team_A'] * 10,
    'home_goals': [4, 3, 5, 2, 4, 3, 5, 4, 3, 6] + [1, 2, 0, 1, 2, 1, 0, 2, 1, 0],
    'away_goals': [1, 2, 0, 1, 2, 1, 0, 2, 1, 0] + [4, 3, 5, 2, 4, 3, 5, 4, 3, 6],
})

model = BaselineEloModel({'k_factor': 32, 'initial_rating': 1200})
model.fit(synthetic)

rA = model.elo.ratings['Team_A']
rB = model.elo.ratings['Team_B']
check('Dominant team A > 1300', rA > 1300)
check('Losing team B < 1100', rB < 1100)
check('A > B', rA > rB)

## Test 2: Equal teams (50/50)

Alternating wins. Ratings should stay near initial.

In [None]:
equal = pd.DataFrame({
    'home_team': ['C'] * 10 + ['D'] * 10,
    'away_team': ['D'] * 10 + ['C'] * 10,
    'home_goals': [3, 2, 4, 2, 3, 2, 3, 2, 4, 3] + [1, 2, 0, 2, 1, 2, 1, 2, 0, 1],
    'away_goals': [1, 2, 0, 2, 1, 2, 1, 2, 0, 1] + [3, 2, 4, 2, 3, 2, 3, 2, 4, 3],
})

m2 = BaselineEloModel({'k_factor': 20, 'initial_rating': 1200})
m2.fit(equal)

rC = m2.elo.ratings['C']
rD = m2.elo.ratings['D']
check('Equal teams near 1200', 1100 < rC < 1300 and 1100 < rD < 1300)
check('Equal teams close', abs(rC - rD) < 80)

## Test 3: Sanity (finite predictions, fit required)

In [None]:
m3 = BaselineEloModel({'k_factor': 5, 'initial_rating': 1200})
m3.fit(synthetic)

h, a = m3.predict_goals({'home_team': 'Team_A', 'away_team': 'Team_B'})
check('predict_goals finite', np.isfinite(h) and np.isfinite(a))
check('predict_goals non-negative', h >= 0 and a >= 0)
check('predict_goals sum reasonable', 1 < h + a < 10)

winner, conf = m3.predict_winner({'home_team': 'Team_A', 'away_team': 'Team_B'})
check('predict_winner returns tuple', isinstance(winner, str) and isinstance(conf, (int, float)))
check('confidence in [0,1]', 0 <= conf <= 1)

## Test 4: evaluate() and get_rankings

In [None]:
m4 = BaselineEloModel({'k_factor': 32, 'initial_rating': 1200})
m4.fit(synthetic)

metrics = m4.evaluate(synthetic)
check('evaluate returns dict', isinstance(metrics, dict))
check('evaluate has combined_rmse', 'combined_rmse' in metrics)
check('evaluate has win_accuracy', 'win_accuracy' in metrics)
check('RMSE finite', np.isfinite(metrics['combined_rmse']))
check('win_accuracy in [0,1]', 0 <= metrics['win_accuracy'] <= 1)

ranks = m4.get_rankings()
check('get_rankings returns list', isinstance(ranks, list))
check('get_rankings (team, rating) tuples', all(len(x) == 2 and isinstance(x[0], str) and isinstance(x[1], (int, float)) for x in ranks))
check('get_rankings sorted', ranks == sorted(ranks, key=lambda x: x[1], reverse=True))

## Test 5: Unknown team (uses base rating)

## Test 6: Edge cases (ties, empty, single game, formula constants)

In [None]:
# Single game
m6a = BaselineEloModel({'k_factor': 32, 'initial_rating': 1200})
m6a.fit(pd.DataFrame([{'home_team': 'A', 'away_team': 'B', 'home_goals': 4, 'away_goals': 1}]))
rA, rB = m6a.elo.ratings['A'], m6a.elo.ratings['B']
check('Single game: winner higher', rA > rB)

# Empty DataFrame - should not crash
m6b = BaselineEloModel({'k_factor': 5, 'initial_rating': 1200})
m6b.fit(pd.DataFrame(columns=['home_team', 'away_team', 'home_goals', 'away_goals']))
h, a = m6b.predict_goals({'home_team': 'X', 'away_team': 'Y'})
check('Empty fit: predict ~3-3', abs(h - 3) < 1 and abs(a - 3) < 1)

# Formula constants - goals clipped non-negative
m6c = BaselineEloModel({'k_factor': 32, 'league_avg_goals': 2, 'goal_diff_half_range': 10})
df_weak = pd.DataFrame({'home_team': ['Weak']*5, 'away_team': ['Strong']*5, 'home_goals': [0]*5, 'away_goals': [5]*5})
m6c.fit(df_weak)
h, a = m6c.predict_goals({'home_team': 'Weak', 'away_team': 'Strong'})
check('Goals non-negative (extreme)', h >= 0 and a >= 0)

# elo_scale=0 fallback
m6d = BaselineEloModel({'k_factor': 32, 'elo_scale': 0})
m6d.fit(synthetic)
h, a = m6d.predict_goals({'home_team': 'Team_A', 'away_team': 'Team_B'})
check('elo_scale=0 returns 50/50', abs(h - 3) < 0.5 and abs(a - 3) < 0.5)

In [None]:
m5 = BaselineEloModel({'k_factor': 32, 'initial_rating': 1200})
m5.fit(synthetic)

winner, conf = m5.predict_winner({'home_team': 'Unknown_Team', 'away_team': 'Team_A'})
check('Unknown team handled', winner in ('Unknown_Team', 'Team_A'))
check('Confidence valid', 0 <= conf <= 1)

h, a = m5.predict_goals({'home_team': 'Unknown_Team', 'away_team': 'Unknown_Team2'})
check('Two unknowns predict ~3-3', abs(h - 3) < 1 and abs(a - 3) < 1)

## Summary

In [None]:
print(f'\nTotal: {passed} passed, {failed} failed')
if failed == 0:
    print('[OK] All Baseline Elo validation tests passed')
else:
    print('[FAIL] Some tests failed')