In [1]:
import numpy as np
import pandas as pd
from skopt import gp_minimize
from skopt.space import Real
from skopt.utils import use_named_args
from skopt.plots import plot_convergence, plot_objective
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.path.dirname(os.getcwd()))
from models.standard_dc import StandardTeamModel
from models.psxg_shots_resimmed_dc import PSxGShotsTeamModel
from models.psxg_totals_resimmed_dc import PSxGTotalTeamModel
from models.xg_shots_resimmed_dc import xGShotsTeamModel
from models.xg_totals_resimmed_dc import xGTotalTeamModel
from data.fetch_match_data import load_data


In [2]:
class EnsembleTeamModel:
    def __init__(self, models=None, model_weights=None):
        """
        Initialize the ensemble model with individual models and their weights.
        
        Parameters:
        models: Dictionary mapping model names to model instances
        model_weights: Dictionary mapping model names to weights
        """
        self.models = models or {}
        self.weights = model_weights or {
            'standard': 0.2,
            'psxg_shots': 0.2,
            'psxg_total': 0.2, 
            'xg_shots': 0.2,
            'xg_total': 0.2
        }
        self.team_attack = {}
        self.team_defense = {}
        self.home_advantage = 0.0
        self.rho = 0.0
        
    def add_model(self, name, model):
        """Add a model to the ensemble."""
        self.models[name] = model
        return self
        
    def combine_parameters(self):
        """Combine parameters from all models based on weights."""
        # Normalize weights to ensure they sum to 1
        total_weight = sum(self.weights.values())
        normalized_weights = {k: v/total_weight for k, v in self.weights.items()}
        
        # Ensure we have all required models
        for name in normalized_weights:
            if name not in self.models:
                raise ValueError(f"Model '{name}' was weighted but not added to the ensemble")
        
        # Get all unique teams across models
        all_teams = set()
        for model_name, model in self.models.items():
            all_teams.update(model.team_attack.keys())
        
        # Initialize combined parameters
        combined_attack = {team: 0.0 for team in all_teams}
        combined_defense = {team: 0.0 for team in all_teams}
        combined_home_advantage = 0.0
        combined_rho = 0.0
        
        # Combine parameters
        for model_name, model in self.models.items():
            weight = normalized_weights.get(model_name, 0.0)
            if weight <= 0:
                continue
                
            # Combine team attack and defense
            for team in all_teams:
                if team in model.team_attack:
                    combined_attack[team] += model.team_attack[team] * weight
                if team in model.team_defense:
                    combined_defense[team] += model.team_defense[team] * weight
            
            # Combine scalar parameters
            combined_home_advantage += model.home_advantage * weight
            combined_rho += model.rho * weight
        
        # Update the ensemble model's parameters
        self.team_attack = combined_attack
        self.team_defense = combined_defense
        self.home_advantage = combined_home_advantage
        self.rho = combined_rho
        
        return self
    
    def predict_match(self, home_team, away_team, max_goals=10):
        """Predict match outcome using the ensemble model."""
        # Check if parameters have been combined
        if not self.team_attack or not self.team_defense:
            self.combine_parameters()
            
        # Check if teams exist in the model
        if home_team not in self.team_attack or away_team not in self.team_attack:
            raise ValueError(f"Teams not found in the ensemble model. Available teams: {sorted(self.team_attack.keys())}")
        
        # Calculate expected goals from ensemble model
        lambda_home = self.team_attack[home_team] * self.team_defense[away_team] * self.home_advantage
        lambda_away = self.team_attack[away_team] * self.team_defense[home_team]
        
        return {
            "home_team": home_team, 
            "away_team": away_team, 
            "home_goals": lambda_home, 
            "away_goals": lambda_away
        }
    
    def print_team_strengths(self, exclude_teams=None):
        """Print team strength analysis from the ensemble model."""
        if exclude_teams is None:
            exclude_teams = []

        # Check if parameters have been combined
        if not self.team_attack or not self.team_defense:
            self.combine_parameters()
            
        # Get all teams
        all_teams = set(self.team_attack.keys())
        all_teams = [team for team in all_teams if team not in exclude_teams]
        
        # Calculate the average attack and defense values across all teams
        total_attack = sum(self.team_attack.get(team, 0) for team in all_teams)
        total_defense = sum(self.team_defense.get(team, 0) for team in all_teams)
        num_teams = len(all_teams)
        
        avg_attack = total_attack / num_teams if num_teams > 0 else 1.0
        avg_defense = total_defense / num_teams if num_teams > 0 else 1.0
        
        print(f"Ensemble Model - League average attack: {avg_attack:.3f}")
        print(f"Ensemble Model - League average defense: {avg_defense:.3f}")
        
        # Create a list of team data
        team_data = []
        for team in all_teams:
            attack = self.team_attack.get(team, float('nan'))
            defense = self.team_defense.get(team, float('nan'))
            overall_log_strength = np.log(attack) - np.log(defense)
            overall_abs_strength = attack - defense
            
            # Calculate expected goals against average opponent (ignoring home/away)
            xg_vs_avg = attack * avg_defense  # Expected goals for vs average defense
            xga_vs_avg = avg_attack * defense  # Expected goals against vs average attack
            
            team_data.append({
                'team': team,
                'attack': attack,
                'defense': defense,
                'overall_log_strength': overall_log_strength,
                'overall_abs_strength': overall_abs_strength,
                'xg_vs_avg': xg_vs_avg,
                'xga_vs_avg': xga_vs_avg,
                'goal_diff_vs_avg': xg_vs_avg - xga_vs_avg
            })
        
        # Sort by overall strength (descending)
        team_data = sorted(team_data, key=lambda x: x['goal_diff_vs_avg'], reverse=True)
        
        # Print header
        print("\n{:<20} {:^20} {:^20} {:^30}".format('', 'Ensemble Model', 'Strength', 'Expected vs Average'))
        print("{:<20} {:^10} {:^10} {:^10} {:^10} {:^10} {:^10} {:^10}".format(
            'Team', 'Attack', 'Defence', 'Log', 'Abs', 'For', 'Against', 'Diff'))
        print("-" * 100)
        
        # Print team data
        for team in team_data:
            print("{:<20} {:^10.2f} {:^10.2f} {:^10.2f} {:^10.2f} {:^10.2f} {:^10.2f} {:^10.2f}".format(
                team['team'],
                team['attack'],
                team['defense'],
                team['overall_log_strength'],
                team['overall_abs_strength'],
                team['xg_vs_avg'],
                team['xga_vs_avg'],
                team['goal_diff_vs_avg']
            ))
        
        # Print model parameters
        print("\nEnsemble Model Parameters:")
        print(f"Home Advantage: {self.home_advantage:.3f}")
        print(f"Rho Parameter: {self.rho:.3f}")
        print("\nModel Weights:")
        for model_name, weight in sorted(self.weights.items()):
            if model_name in self.models:  # Only show weights for included models
                print(f"{model_name}: {weight:.3f}")
        
        return team_data
    
    @staticmethod
    def dc_probability(home_goals, away_goals, lambda_home, lambda_away, rho):
        """Calculate Dixon-Coles adjusted probability for a match outcome."""
        # Use the same probability calculation as StandardTeamModel
        from scipy.stats import poisson
        
        # Base Poisson probabilities
        p_home = poisson.pmf(home_goals, lambda_home)
        p_away = poisson.pmf(away_goals, lambda_away)
        
        # Dixon-Coles adjustment for low-scoring dependencies
        tau = 1.0
        if home_goals == 0 and away_goals == 0:
            tau = 1 - rho
        elif home_goals == 0 and away_goals == 1:
            tau = 1 + rho * lambda_home
        elif home_goals == 1 and away_goals == 0:
            tau = 1 + rho * lambda_away
        elif home_goals == 1 and away_goals == 1:
            tau = 1 - rho * lambda_home * lambda_away
        
        return tau * p_home * p_away

In [3]:
shot_data, match_stats = load_data()
matches = match_stats.to_dict('records')

StandardTeamModel = StandardTeamModel()
PSxGShotsTeamModel = PSxGShotsTeamModel(n_simulations=25)
PSxGTotalTeamModel = PSxGTotalTeamModel(n_simulations=25)
xGShotsTeamModel = xGShotsTeamModel(n_simulations=25)
xGTotalTeamModel = xGTotalTeamModel(n_simulations=25)

StandardTeamModel.fit_models(matches, epsilon=0.005, season_penalty=1, days_ago=365)
PSxGShotsTeamModel.fit_models(matches, shot_data, epsilon=0.005, season_penalty=1)
PSxGTotalTeamModel.fit_models(matches, epsilon=0.005, season_penalty=1)
xGShotsTeamModel.fit_models(matches, shot_data, epsilon=0.005, season_penalty=1)
xGTotalTeamModel.fit_models(matches, epsilon=0.005, season_penalty=1)





Optimizing for 907 matches with 47 teams
Match 0: {'match_url': 'https://fbref.com/en/matches/008769e1/Norwich-City-Sheffield-United-August-24-2024-Championship', 'match_date': Timestamp('2024-08-24 00:00:00'), 'home_team': 'Norwich City', 'away_team': 'Sheffield Utd', 'season': 2024, 'division': 'EFL Championship', 'home_goals': 1, 'home_xg': 0.95, 'home_psxg': 1.53, 'away_goals': 1, 'away_xg': 0.8, 'away_psxg': 1.04, 'days_from_ref': 187}
Match 1: {'match_url': 'https://fbref.com/en/matches/00bcfc31/Arsenal-Bournemouth-May-4-2024-Premier-League', 'match_date': Timestamp('2024-05-04 00:00:00'), 'home_team': 'Arsenal', 'away_team': 'Bournemouth', 'season': 2023, 'division': 'Premier League', 'home_goals': 3, 'home_xg': 3.39, 'home_psxg': 1.98, 'away_goals': 0, 'away_xg': 0.46, 'away_psxg': 0.33, 'days_from_ref': 299}
Match 2: {'match_url': 'https://fbref.com/en/matches/00bde56f/Luton-Town-Swansea-City-December-7-2024-Championship', 'match_date': Timestamp('2024-12-07 00:00:00'), 'home_

KeyboardInterrupt: 

In [None]:
model_weights = {
    'standard': 0.3,       
    'psxg_shots': 0.175,    
    'psxg_total': 0.175,     
    'xg_shots': 0.175,       
    'xg_total': 0.175        
}

# Initialize the ensemble model
ensemble = EnsembleTeamModel(model_weights=model_weights)

# Add each model to the ensemble
ensemble.add_model('standard', StandardTeamModel)
ensemble.add_model('psxg_shots', PSxGShotsTeamModel)
ensemble.add_model('psxg_total', PSxGTotalTeamModel)
ensemble.add_model('xg_shots', xGShotsTeamModel)
ensemble.add_model('xg_total', xGTotalTeamModel)

# Combine the parameters from all models
ensemble.combine_parameters()


<__main__.EnsembleTeamModel at 0x15eb7ab4f80>

In [None]:
prediction = ensemble.predict_match('Liverpool', 'Southampton')
print(prediction)

# To print team strengths:
ensemble.print_team_strengths()


{'home_team': 'Liverpool', 'away_team': 'Southampton', 'home_goals': np.float64(3.767356551530681), 'away_goals': np.float64(0.42059318897778314)}
Ensemble Model - League average attack: 1.070
Ensemble Model - League average defense: 1.070

                        Ensemble Model          Strength            Expected vs Average      
Team                   Attack    Defence      Log        Abs        For      Against      Diff   
----------------------------------------------------------------------------------------------------
Liverpool               1.69       0.74       0.82       0.94       1.81       0.80       1.01   
Arsenal                 1.30       0.56       0.83       0.73       1.39       0.60       0.79   
Manchester City         1.59       0.89       0.59       0.71       1.71       0.95       0.76   
Chelsea                 1.56       1.04       0.40       0.52       1.67       1.11       0.56   
Newcastle Utd           1.36       1.02       0.29       0.34       1.45  

[{'team': 'Liverpool',
  'attack': np.float64(1.689567902689479),
  'defense': np.float64(0.744943501008089),
  'overall_log_strength': np.float64(0.8189197184130257),
  'overall_abs_strength': np.float64(0.94462440168139),
  'xg_vs_avg': np.float64(1.8080626755061417),
  'xga_vs_avg': np.float64(0.7971813546916113),
  'goal_diff_vs_avg': np.float64(1.0108813208145304)},
 {'team': 'Arsenal',
  'attack': np.float64(1.2982529794312958),
  'defense': np.float64(0.5633011044055569),
  'overall_log_strength': np.float64(0.8349604711833959),
  'overall_abs_strength': np.float64(0.7349518750257389),
  'xg_vs_avg': np.float64(1.3893035916093492),
  'xga_vs_avg': np.float64(0.6028016042849222),
  'goal_diff_vs_avg': np.float64(0.786501987324427)},
 {'team': 'Manchester City',
  'attack': np.float64(1.5939468169405466),
  'defense': np.float64(0.885186683071205),
  'overall_log_strength': np.float64(0.5881699301780773),
  'overall_abs_strength': np.float64(0.7087601338693417),
  'xg_vs_avg': np.

In [None]:
shot_data, match_stats = load_data()
matches = match_stats.to_dict('records')

teammodel = StandardTeamModel()

teammodel.fit_models(matches, epsilon=0.005, season_penalty=1, days_ago=365)

Optimizing for 936 matches with 47 teams
Match 0: {'match_url': 'https://fbref.com/en/matches/008769e1/Norwich-City-Sheffield-United-August-24-2024-Championship', 'match_date': Timestamp('2024-08-24 00:00:00'), 'home_team': 'Norwich City', 'away_team': 'Sheffield Utd', 'season': 2024, 'home_goals': 1, 'home_xg': 0.95, 'home_psxg': 1.53, 'away_goals': 1, 'away_xg': 0.8, 'away_psxg': 1.04, 'days_from_ref': 187}
Match 1: {'match_url': 'https://fbref.com/en/matches/00bcfc31/Arsenal-Bournemouth-May-4-2024-Premier-League', 'match_date': Timestamp('2024-05-04 00:00:00'), 'home_team': 'Arsenal', 'away_team': 'Bournemouth', 'season': 2023, 'home_goals': 3, 'home_xg': 3.39, 'home_psxg': 1.98, 'away_goals': 0, 'away_xg': 0.46, 'away_psxg': 0.33, 'days_from_ref': 299}
Match 2: {'match_url': 'https://fbref.com/en/matches/00bde56f/Luton-Town-Swansea-City-December-7-2024-Championship', 'match_date': Timestamp('2024-12-07 00:00:00'), 'home_team': 'Luton Town', 'away_team': 'Swansea City', 'season': 20

<models.standard_dc.StandardTeamModel at 0x29543e6b9b0>

In [None]:
teammodel.print_team_strengths()

League average attack: 1.019
League average defense: 1.019

                        Standard Model          Strength            Expected vs Average      
Team                   Attack    Defence      Log        Abs        For      Against      Diff   
----------------------------------------------------------------------------------------------------
Liverpool               1.97       0.60       1.18       1.37       2.01       0.62       1.40   
Manchester City         1.90       0.70       1.00       1.20       1.94       0.71       1.22   
Arsenal                 1.57       0.46       1.23       1.11       1.60       0.47       1.14   
Chelsea                 1.80       0.88       0.72       0.92       1.83       0.89       0.94   
Newcastle Utd           1.67       0.86       0.67       0.81       1.70       0.87       0.83   
Leeds United            1.34       0.63       0.76       0.71       1.37       0.64       0.73   
Tottenham               1.55       0.86       0.59       0.

[{'team': 'Liverpool',
  'std_attack': np.float64(1.9727911197388128),
  'std_defense': np.float64(0.6039511278727037),
  'overall_log_strength': np.float64(1.1837113505079588),
  'overall_abs_strength': np.float64(1.3688399918661092),
  'xg_vs_avg': np.float64(2.011257280964788),
  'xga_vs_avg': np.float64(0.6157228436306403),
  'goal_diff_vs_avg': np.float64(1.3955344373341476)},
 {'team': 'Manchester City',
  'std_attack': np.float64(1.9009300680165904),
  'std_defense': np.float64(0.6995216582498206),
  'overall_log_strength': np.float64(0.9997017987671184),
  'overall_abs_strength': np.float64(1.2014084097667697),
  'xg_vs_avg': np.float64(1.937995057687322),
  'xga_vs_avg': np.float64(0.7131561557238828),
  'goal_diff_vs_avg': np.float64(1.2248389019634391)},
 {'team': 'Arsenal',
  'std_attack': np.float64(1.5730469812967214),
  'std_defense': np.float64(0.45908261908949993),
  'overall_log_strength': np.float64(1.2315395780545737),
  'overall_abs_strength': np.float64(1.11396436