# ELO Rating System - Implementation

Custom ELO system with:
- Rest time advantages
- Travel distance fatigue
- Injury penalties
- Division-aware initialization
- Margin of victory adjustments

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

## ELO Model Class

In [None]:
class EloModel:
    def __init__(self, params):
        """
        Initialize ELO model with hyperparameters.
        
        params: dict with keys:
            - k_factor: rating change rate (20-40)
            - home_advantage: home ice boost (50-150)
            - initial_rating: starting rating (1500)
            - mov_multiplier: margin of victory weight (0-1.5)
            - mov_method: 'linear' or 'logarithmic'
            - season_carryover: year-to-year retention (0.67-0.85)
            - ot_win_multiplier: OT win value (0.75-1.0)
            - rest_advantage_per_day: rating boost per rest day (0-10)
            - b2b_penalty: back-to-back penalty (0-50)
        """
        self.params = params
        self.ratings = {}
        self.rating_history = []
    
    def initialize_ratings(self, teams, divisions):
        """Initialize team ratings based on division tier."""
        division_ratings = {
            'D1': self.params.get('initial_rating', 1500) + 100,
            'D2': self.params.get('initial_rating', 1500),
            'D3': self.params.get('initial_rating', 1500) - 100
        }
        
        for team, division in zip(teams, divisions):
            self.ratings[team] = division_ratings.get(division, 1500)
    
    def calculate_expected_score(self, team_elo, opponent_elo):
        """Calculate expected win probability."""
        return 1 / (1 + 10 ** ((opponent_elo - team_elo) / 400))
    
    def calculate_mov_multiplier(self, goal_diff):
        """Calculate margin of victory multiplier."""
        if self.params['mov_multiplier'] == 0:
            return 1.0
        
        if self.params['mov_method'] == 'linear':
            return 1 + (abs(goal_diff) * self.params['mov_multiplier'])
        else:  # logarithmic
            return 1 + (np.log(abs(goal_diff) + 1) * self.params['mov_multiplier'])
    
    def get_actual_score(self, outcome):
        """Convert game outcome to actual score (0-1)."""
        if outcome in ['RW', 'W']:  # Regulation win
            return 1.0
        elif outcome == 'OTW':  # Overtime win
            return self.params.get('ot_win_multiplier', 0.75)
        elif outcome == 'OTL':  # Overtime loss
            return 1 - self.params.get('ot_win_multiplier', 0.75)
        else:  # Regulation loss
            return 0.0
    
    def adjust_for_context(self, team_elo, is_home, rest_time, travel_dist, injuries):
        """Apply contextual adjustments to ELO rating."""
        adjusted_elo = team_elo
        
        # Home advantage
        if is_home:
            adjusted_elo += self.params['home_advantage']
        
        # Back-to-back penalty
        if rest_time <= 1:
            adjusted_elo -= self.params.get('b2b_penalty', 0)
        
        # Travel fatigue (15 points per 1000 miles)
        if not is_home and travel_dist > 0:
            adjusted_elo -= (travel_dist / 1000) * 15
        
        # Injury penalty (25 points per key injury)
        adjusted_elo -= injuries * 25
        
        return adjusted_elo
    
    def update_ratings(self, game):
        """Update team ratings after a game."""
        # Get base ratings
        home_elo = self.ratings[game['home_team']]
        away_elo = self.ratings[game['away_team']]
        
        # Apply contextual adjustments
        home_elo_adj = self.adjust_for_context(
            home_elo, True, game['home_rest'], 0, game['home_injuries']
        )
        away_elo_adj = self.adjust_for_context(
            away_elo, False, game['away_rest'], game['away_travel_dist'], game['away_injuries']
        )
        
        # Rest differential advantage
        rest_diff = game['home_rest'] - game['away_rest']
        home_elo_adj += rest_diff * self.params.get('rest_advantage_per_day', 0)
        
        # Calculate expected scores
        home_expected = self.calculate_expected_score(home_elo_adj, away_elo_adj)
        away_expected = 1 - home_expected
        
        # Get actual scores
        home_actual = self.get_actual_score(game['home_outcome'])
        away_actual = 1 - home_actual
        
        # Calculate margin of victory multiplier
        goal_diff = game['home_goals'] - game['away_goals']
        mov_mult = self.calculate_mov_multiplier(goal_diff)
        
        # Update ratings
        k = self.params['k_factor'] * mov_mult
        self.ratings[game['home_team']] += k * (home_actual - home_expected)
        self.ratings[game['away_team']] += k * (away_actual - away_expected)
        
        # Store history
        self.rating_history.append({
            'game_id': game.get('game_id'),
            'home_team': game['home_team'],
            'away_team': game['away_team'],
            'home_rating': self.ratings[game['home_team']],
            'away_rating': self.ratings[game['away_team']]
        })
    
    def predict_goals(self, game):
        """Predict goals for both teams."""
        # Get adjusted ratings
        home_elo_adj = self.adjust_for_context(
            self.ratings[game['home_team']], True, 
            game['home_rest'], 0, game['home_injuries']
        )
        away_elo_adj = self.adjust_for_context(
            self.ratings[game['away_team']], False,
            game['away_rest'], game['away_travel_dist'], game['away_injuries']
        )
        
        # Rest differential
        rest_diff = game['home_rest'] - game['away_rest']
        home_elo_adj += rest_diff * self.params.get('rest_advantage_per_day', 0)
        
        # Calculate win probability
        home_win_prob = self.calculate_expected_score(home_elo_adj, away_elo_adj)
        
        # Convert to expected goal differential
        # Scale: 50% win prob = 0 goal diff, 100% = +6 goals, 0% = -6 goals
        expected_diff = (home_win_prob - 0.5) * 12
        
        # League average is ~3 goals per team
        home_goals = 3.0 + (expected_diff / 2)
        away_goals = 3.0 - (expected_diff / 2)
        
        return home_goals, away_goals
    
    def fit(self, games_df):
        """Train the model on historical games."""
        # Initialize ratings
        teams = pd.concat([games_df['home_team'], games_df['away_team']]).unique()
        divisions = games_df.groupby('home_team')['division'].first()  # Assuming division is constant
        self.initialize_ratings(teams, divisions)
        
        # Update ratings game-by-game
        for _, game in games_df.iterrows():
            self.update_ratings(game)
    
    def evaluate(self, games_df):
        """Evaluate model on test set."""
        predictions = []
        actuals = []
        
        for _, game in games_df.iterrows():
            home_pred, away_pred = self.predict_goals(game)
            predictions.append(home_pred)
            actuals.append(game['home_goals'])
        
        rmse = mean_squared_error(actuals, predictions, squared=False)
        mae = mean_absolute_error(actuals, predictions)
        r2 = r2_score(actuals, predictions)
        
        return {'rmse': rmse, 'mae': mae, 'r2': r2}

## Example Usage

In [None]:
# Load data (replace with your actual data path)
# df = pd.read_csv('data/hockey_data.csv')
# df = df.sort_values('game_date')  # CRITICAL: chronological order

# Example parameters
params = {
    'k_factor': 32,
    'home_advantage': 100,
    'initial_rating': 1500,
    'mov_multiplier': 1.0,
    'mov_method': 'logarithmic',
    'season_carryover': 0.75,
    'ot_win_multiplier': 0.75,
    'rest_advantage_per_day': 10,
    'b2b_penalty': 50
}

# Initialize and train
# model = EloModel(params)
# model.fit(df)

# Evaluate
# metrics = model.evaluate(df)
# print(f"RMSE: {metrics['rmse']:.3f}")
# print(f"MAE: {metrics['mae']:.3f}")
# print(f"RÂ²: {metrics['r2']:.3f}")

## Visualize Rating History

In [None]:
# Plot rating progression for top teams
# history_df = pd.DataFrame(model.rating_history)
# top_teams = sorted(model.ratings.items(), key=lambda x: x[1], reverse=True)[:5]

# plt.figure(figsize=(14, 7))
# for team, _ in top_teams:
#     team_history = history_df[history_df['home_team'] == team]['home_rating']
#     plt.plot(team_history, label=team, linewidth=2)

# plt.xlabel('Game Number')
# plt.ylabel('ELO Rating')
# plt.title('ELO Rating Progression - Top 5 Teams')
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.show()