In [18]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import poisson
import requests
import json
import os

API_KEY = os.getenv("API_KEY")
url = 'https://data-service.beatthebookie.blog/data'
headers = {"x-api-key": API_KEY}
params = {'division':'Premier League', 'season': ['2023_2024']}
response = requests.get(url, headers=headers, params=params)
json_str = response.content.decode('utf-8')
data= pd.read_json(json_str)
print(data[["match_date", "home_team", "away_team", "home_goals", "home_xgoals", "away_goals", "away_xgoals"]].tail())

# Prepare the Data
teams = np.sort(np.unique(np.concatenate([data['home_team'], data['away_team']])))
team_idx = {team: i for i, team in enumerate(teams)}

data['home_team_idx'] = data['home_team'].map(team_idx)
data['away_team_idx'] = data['away_team'].map(team_idx)

# Build the Model
def dc_log_likelihood(params, home_team_idx, away_team_idx, home_goals, away_goals, n_teams, rho=0.001):
    # Extract parameters
    home_adv = params[0]
    attack = params[1:n_teams+1]
    defense = params[n_teams+1:2*n_teams+1]

    log_likelihood = 0
    
    for i in range(len(home_team_idx)):
        h = home_team_idx[i]
        a = away_team_idx[i]
        
        home_mu = np.exp(attack[h] + defense[a] + home_adv)
        away_mu = np.exp(attack[a] + defense[h])
        
        # Dixon-Coles adjustment for low-scoring games
        if home_goals[i] == 0 and away_goals[i] == 0:
            adjust = -home_mu * away_mu * rho
        elif home_goals[i] == 1 and away_goals[i] == 0:
            adjust = rho * home_mu
        elif home_goals[i] == 0 and away_goals[i] == 1:
            adjust = rho * away_mu
        elif home_goals[i] == 1 and away_goals[i] == 1:
            adjust = -rho
        else:
            adjust = 0
        
        log_likelihood += (np.log(poisson.pmf(home_goals[i], home_mu)) +
                           np.log(poisson.pmf(away_goals[i], away_mu)) +
                           adjust)

    return -log_likelihood

# Initialize parameters
n_teams = len(teams)
initial_params = np.concatenate((np.array([0.0]),  # Home advantage
                                 np.random.uniform(0, 1, n_teams),  # Attack strengths
                                 np.random.uniform(0, 1, n_teams))) # Defense strengths

# Optimize the Parameters
result = minimize(dc_log_likelihood, initial_params,
                  args=(data['home_team_idx'], data['away_team_idx'],
                        data['home_goals'], data['away_goals'], n_teams),
                  method='L-BFGS-B')

# Extract the optimized parameters
home_advantage = result.x[0]
attack_strengths = result.x[1:n_teams+1]
defense_strengths = result.x[n_teams+1:2*n_teams+1]

# Evaluate and Predict (Example Prediction)
def predict(home_team, away_team):
    h = team_idx[home_team]
    a = team_idx[away_team]
    
    home_goals_avg = np.exp(attack_strengths[h] + defense_strengths[a] + home_advantage)
    away_goals_avg = np.exp(attack_strengths[a] + defense_strengths[h])
    
    home_goals_dist = poisson(home_goals_avg)
    away_goals_dist = poisson(away_goals_avg)
    
    return home_goals_dist, away_goals_dist

# Create a DataFrame to display attack and defense strengths
team_strengths = pd.DataFrame({
    'Team': teams,
    'Attack Strength': attack_strengths,
    'Defense Strength': defense_strengths,
    'Overall Strength': attack_strengths - defense_strengths
})


  data= pd.read_json(json_str)


     match_date       home_team      away_team  home_goals  home_xgoals  \
375  2024-05-19        Man City       West Ham           3      2.31504   
376  2024-05-19       Liverpool         Wolves           2      5.65937   
377  2024-05-19         Burnley  Nott'm Forest           1      1.08222   
378  2024-05-19  Crystal Palace    Aston Villa           5      2.23189   
379  2024-05-19           Luton         Fulham           2      2.06743   

     away_goals  away_xgoals  
375           1     0.240159  
376           0     0.731254  
377           2     1.579620  
378           0     0.598846  
379           4     1.115980  


In [19]:
# Display the results
print(team_strengths.sort_values(by="Overall Strength", ascending=False))

                Team  Attack Strength  Defense Strength  Overall Strength
0            Arsenal         0.502664         -0.499169          1.001833
12          Man City         0.560490         -0.335537          0.896027
10         Liverpool         0.456115         -0.156406          0.612521
14         Newcastle         0.462517          0.257706          0.204812
1        Aston Villa         0.349277          0.233535          0.115742
6            Chelsea         0.364044          0.266779          0.097265
17         Tottenham         0.322492          0.231828          0.090664
13        Man United         0.058259          0.166862         -0.108603
7     Crystal Palace         0.058174          0.166780         -0.108607
9             Fulham         0.024856          0.215670         -0.190813
4           Brighton         0.025663          0.231858         -0.206196
3          Brentford         0.046260          0.280275         -0.234015
18          West Ham         0.123150 

In [20]:
# Example prediction
home_goals_dist, away_goals_dist = predict('West Ham', 'Arsenal')
print(f"Expected goals for home: {home_goals_dist.mean()}")
print(f"Expected goals for away: {away_goals_dist.mean()}")

Expected goals for home: 0.8356343941534056
Expected goals for away: 2.499984573631381
