In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import os
import requests
import io
from datetime import datetime, timedelta
import multiprocessing
import arviz as az
import logging
import tqdm




In [None]:
# build model
import psutil

def build_bayesian_model(shot_data, match_data, red_cards):
    """
    Build Bayesian model using shot-level data and incorporating red cards.
    Each shot is modeled as a Bernoulli trial with p = xG.
    """
    print("Building shot-level Bayesian model...")
    
    # Get unique teams and leagues from match_data
    teams = sorted(list(set(match_data['home_team']) | set(match_data['away_team'])))
    unique_leagues = sorted(list(set(match_data['league'])))
    
    team_indices = {team: idx for idx, team in enumerate(teams)}
    league_indices = {league: idx for idx, league in enumerate(unique_leagues)}
    
    # Create mapping of teams to their league indices
    team_league_idx = np.zeros(len(teams), dtype=int)
    for team, idx in team_indices.items():
        # Find the team in match_data and get its league
        team_matches = match_data[
            (match_data['home_team'] == team) | 
            (match_data['away_team'] == team)
        ].iloc[0]
        team_league = team_matches['league']
        team_league_idx[idx] = league_indices[team_league]
    
    # Process shots data
    shot_team_idx = [team_indices[team] for team in shot_data['Team']]
    opponent_idx = [team_indices[opp] for opp in shot_data['Opponent']]
    is_home = shot_data['is_home'].astype(int)
    xg_values = shot_data['xG']
    actual_goals = (shot_data['Event Type'] == 'Shot') & (shot_data['Outcome'] == 'Goal')
    
    # Process red card information
    attacking_team_has_red = np.zeros(len(shot_data))  # team taking the shot has red
    defending_team_has_red = np.zeros(len(shot_data))  # team defending has red
    
    for idx, shot in shot_data.iterrows():
        match_reds = red_cards[red_cards['match_id'] == shot['match_id']]
        relevant_reds = match_reds[match_reds['minute'] < shot['Minute']]
        
        if len(relevant_reds) > 0:
            for _, red in relevant_reds.iterrows():
                if red['team'] == shot['Team']:
                    attacking_team_has_red[idx] = 1
                elif red['team'] == shot['Opponent']:
                    defending_team_has_red[idx] = 1
    
    with pm.Model() as model:
        # League parameters
        league_shot_mu = pm.Normal("league_shot_mu", mu=0, sigma=0.5)
        league_shot_sigma = pm.HalfNormal("league_shot_sigma", sigma=0.3)  # based on shot data
        
        # League strength (relative to Premier League)
        premier_league_idx = league_indices["Premier League"]
        league_strength_raw = pm.Normal("league_strength_raw", mu=-0.5, sigma=0.3, 
                                      shape=len(unique_leagues)-1)
        league_strength = pm.Deterministic(
            "league_strength",
            pm.math.concatenate([
                league_strength_raw[:premier_league_idx],
                pm.math.zeros(1),
                league_strength_raw[premier_league_idx:]
            ])
        )
        
        # Team shot creation parameters
        shot_creation_raw = pm.Normal("shot_creation_raw", mu=0, sigma=0.3, shape=len(teams))
        
        # Scale shot creation by league
        shot_creation = pm.Deterministic(
            "shot_creation",
            shot_creation_raw * league_shot_sigma + league_shot_mu + 
            league_strength[team_league_idx]
        )
        
        # Separate red card effects for attack and defense
        red_card_attack_effect = pm.Normal("red_card_attack_effect", mu=-0.65, sigma=0.2)  # stronger negative effect on attack
        red_card_defense_effect = pm.Normal("red_card_defense_effect", mu=-0.54, sigma=0.2)  # moderate negative effect on defense
        
        # Home advantage for shot creation
        home_advantage = pm.Normal("home_advantage", mu=0.2, sigma=0.1, 
                                 shape=len(unique_leagues))
        
        # Adjust shot creation based on red cards
        shot_creation_with_reds = shot_creation + (
            red_card_attack_effect * attacking_team_has_red +  # effect on attacking team
            red_card_defense_effect * defending_team_has_red    # effect on defending team
        )
        
        # Each shot is a Bernoulli trial with p = xG
        goals_like = pm.Bernoulli("goals", p=xg_values, observed=actual_goals)
        
        print("Model building completed!")
        
    return model, team_indices, league_indices


def fit_bayesian_model(model, draws=500):
    n_cores = min(4, multiprocessing.cpu_count() - 1)
    
    print(f"Starting model fitting with {n_cores} cores...")
    print(f"Planning {draws} draws with 500 tuning steps...")
    
    with model:
        trace = pm.sample(
            draws=draws,
            tune=500,
            chains=n_cores,
            cores=n_cores,
            progressbar=True,
            return_inferencedata=True,
            init='adapt_diag',
            target_accept=0.95,
            nuts={"max_treedepth": 15}  # Correctly nested NUTS parameter
        )
        
        # Print sampling diagnostics
        print("\nSampling Statistics:")
        print(f"Number of divergences: {trace.sample_stats.diverging.sum().values}")
        
        return trace
    
# Function to monitor memory usage
def print_memory_usage():
    process = psutil.Process()
    print(f"Memory usage: {process.memory_info().rss / 1024 / 1024:.2f} MB")

# Setup logging
logging.getLogger('pymc').setLevel(logging.INFO)



def get_league_strengths(trace, league_indices):
    leagues = list(league_indices.keys())
    league_strength_means = trace.posterior['league_strength'].mean(dim=['chain', 'draw']).values
    
    results = pd.DataFrame({
        'league': leagues,
        'league_strength': league_strength_means
    })
    
    return results.round(3).sort_values('league_strength', ascending=False)

def get_hierarchical_team_strengths(trace, team_indices, league_indices, team_leagues, current_teams):
    teams = list(team_indices.keys())
    attack_means = trace.posterior['attack'].mean(dim=['chain', 'draw']).values
    defense_means = trace.posterior['defense'].mean(dim=['chain', 'draw']).values
    home_adv = trace.posterior['home_advantage'].mean(dim=['chain', 'draw']).values
    
    # Get league strengths for reference
    league_strengths = get_league_strengths(trace, league_indices)
    
    results = pd.DataFrame({
        'team': teams,
        'league': [team_leagues.get(team, 'Unknown') for team in teams],  # Correctly map teams to leagues
        'attack_strength': attack_means,
        'defense_strength': defense_means,
        'overall_strength': (np.exp(attack_means - np.mean(defense_means)) - 
                           np.exp(np.mean(attack_means) - defense_means)),
        'home_advantage': home_adv
    })
    
    # Merge with league strengths
    results = results.merge(
        league_strengths,
        left_on='league',
        right_on='league',
        how='left'
    )
    
    # Filter current teams and sort
    results = (results[results['team'].isin(current_teams)]
              .round(3)
              .sort_values('overall_strength', ascending=False))
    
    return results, home_adv

def analyze_league_strengths(trace, league_indices, team_indices, team_leagues):
    # Get basic league strengths
    leagues = list(league_indices.keys())
    league_strength_means = trace.posterior['league_strength'].mean(dim=['chain', 'draw']).values
    
    # Get the posterior distributions for additional analysis
    league_attack_mu = trace.posterior['league_attack_mu'].mean(dim=['chain', 'draw']).values
    league_attack_sigma = trace.posterior['league_attack_sigma'].mean(dim=['chain', 'draw']).values
    league_defense_mu = trace.posterior['league_defense_mu'].mean(dim=['chain', 'draw']).values
    league_defense_sigma = trace.posterior['league_defense_sigma'].mean(dim=['chain', 'draw']).values
    
    # Calculate league-specific metrics
    detailed_results = []
    
    for league in leagues:
        league_idx = league_indices[league]
        league_teams = [team for team, l in team_leagues.items() if l == league]
        
        league_data = {
            'league': league,
            'base_strength': league_strength_means[league_idx],
            'attack_variation': league_attack_sigma,  # How much attack strength varies within the league
            'defense_variation': league_defense_sigma,  # How much defense strength varies within the league
            'num_teams': len(league_teams),
            'teams': ', '.join(sorted(league_teams)[:5]) + ('...' if len(league_teams) > 5 else '')
        }
        
        detailed_results.append(league_data)
    
    results_df = pd.DataFrame(detailed_results)
    
    # Calculate expected goals adjustment between leagues
    for idx, row in results_df.iterrows():
        base_league_strength = row['base_strength']
        results_df.loc[idx, 'expected_goals_vs_avg'] = np.exp(base_league_strength) - 1
    
    return results_df.round(3).sort_values('base_strength', ascending=False)
    

In [None]:
data = df[["home_team", "away_team", "home_goals", "away_goals", "home_xgoals", "away_xgoals", "match_date", "division"]]

# filter to matches only in previous 365 days
data = data[data["match_date"] > datetime.now() - timedelta(days=365)]

# get list of current teams
current_teams = df[df["season"] == 20242025]["home_team"].unique()

# get list of leagues
team_leagues = dict(zip(df["home_team"], df["division"]))

# Build model
model, team_indices, league_indices = build_bayesian_model(
        home_teams=data['home_team'],
        away_teams=data['away_team'],
        home_goals=np.array(data['home_goals']),
        away_goals=np.array(data['away_goals']),
        home_xg=np.array(data["home_xgoals"]),
        away_xg=np.array(data["away_xgoals"]),
        dates=data["match_date"],
        leagues=data["division"]
    )
    
# Fit model
print_memory_usage()
trace = fit_bayesian_model(model)
print_memory_usage()




Building Bayesian model...
Dataset size: 895 matches
Time span: 2024-02-11 00:00:00 to 2025-01-26 00:00:00
Model building completed!
Memory usage: 270.15 MB
Starting model fitting with 4 cores...
Planning 500 draws with 500 tuning steps...


  warn(
Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
  warn(
  warn(
  warn(
  warn(
  warn(
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [league_attack_mu, league_attack_sigma, league_defense_mu, league_defense_sigma, league_strength_raw, attack_raw, defense_raw, decay_rate, home_advantage, xg_alpha]


Sampling 4 chains for 500 tune and 500 draw iterations (2_000 + 2_000 draws total) took 6301 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details



Sampling Statistics:
Number of divergences: 0
Memory usage: 144.72 MB


In [6]:
# Create a dictionary mapping each team to its league based on the most recent season
latest_season = df["season"].max()
previous_season = latest_season - 1

# Combine current and previous season data
combined_df = pd.concat([df[df["season"] == latest_season], df[df["season"] == previous_season]])

# Create a dictionary mapping each team to its league
team_leagues = dict(zip(combined_df["home_team"], combined_df["division"]))

# Get results
team_strengths, home_advantage = get_hierarchical_team_strengths(
    trace=trace,
    team_indices=team_indices,
    league_indices=league_indices,
    team_leagues=team_leagues,
    current_teams=current_teams
)

# Analyze league strengths
league_analysis = analyze_league_strengths(
    trace=trace,
    league_indices=league_indices,
    team_indices=team_indices,
    team_leagues=team_leagues
)

# Print results
print("\nTeam Strengths:")
print(team_strengths)

print("\nLeague Analysis:")
print(league_analysis)

NameError: name 'trace' is not defined

In [3]:
import pickle

def save_model_results(trace, team_indices, league_indices, team_strengths, league_analysis, filename=None):
    """Save all model results to a pickle file"""
    if filename is None:
        filename = f'model_results_{datetime.now().strftime("%Y%m%d")}.pkl'
    
    results = {
        'trace': trace,
        'team_indices': team_indices,
        'league_indices': league_indices,
        'team_strengths': team_strengths,
        'league_analysis': league_analysis
    }
    with open(filename, 'wb') as f:
        pickle.dump(results, f)
    print(f"Results saved to {filename}")

def load_model_results(filename):
    """Load model results from pickle file"""
    with open(filename, 'rb') as f:
        results = pickle.load(f)
    return (results['trace'], results['team_indices'], results['league_indices'], 
            results['team_strengths'], results['league_analysis'])

filename = f'model_results_{datetime.now().strftime("%Y%m%d")}.pkl'
save_model_results(trace, team_indices, league_indices, team_strengths, league_analysis, filename)

NameError: name 'trace' is not defined

In [4]:
filename = f'model_results_20250209.pkl'

trace, team_indices, league_indices, team_strengths, league_analysis = load_model_results(filename)

# Print results
print("\nTeam Strengths:")
print(team_strengths)

print("\nLeague Analysis:")
print(league_analysis)


Team Strengths:
                team          league  attack_strength  defense_strength  \
22         Liverpool  Premier League            0.609             0.147   
0            Arsenal  Premier League            0.493             0.429   
24          Man City  Premier League            0.516             0.089   
10           Chelsea  Premier League            0.517            -0.119   
28         Newcastle  Premier League            0.399            -0.077   
4        Bournemouth  Premier League            0.363            -0.094   
42         Tottenham  Premier League            0.393            -0.199   
16            Fulham  Premier League            0.160            -0.035   
12    Crystal Palace  Premier League            0.165            -0.047   
1        Aston Villa  Premier League            0.250            -0.202   
30     Nott'm Forest  Premier League            0.133             0.005   
6           Brighton  Premier League            0.137            -0.039   
5       

In [5]:
def predict_match(home_team, away_team, trace, team_indices):
    home_idx = team_indices[home_team]
    away_idx = team_indices[away_team]

    # returning the range of distributions that teams attack/defense and home_adv could lie between
    attack_samples = trace.posterior["attack"].values
    defense_samples = trace.posterior["defense"].values
    home_advantage = trace.posterior["home_advantage"].values

    # use all combinations of strength to make prediction
    home_theta = np.exp(attack_samples[..., home_idx] - # ... means use all chains and draws
                        defense_samples[..., away_idx] +
                        home_advantage)
    away_theta = np.exp(attack_samples[...,  away_idx] - 
                        defense_samples[..., home_idx])
    
    # calculate mean expected goals from above samples
    home_xg = float(home_theta.mean())
    away_xg = float(away_theta.mean())

    # simulate match many times using Poisson distribution
    n_sims = 1000
    home_goals = np.random.poisson(home_xg, n_sims)
    away_goals = np.random.poisson(away_xg, n_sims)

    # Calculate match outcome probabilities
    home_wins = np.mean(home_goals > away_goals)
    draws = np.mean(home_goals == away_goals)
    away_wins = np.mean(home_goals < away_goals)

    return {
        'home_xg': round(home_xg, 2),
        'away_xg': round(away_xg, 2),
        'home_win_prob': round(home_wins * 100, 1),
        'draw_prob': round(draws * 100, 1),
        'away_win_prob': round(away_wins * 100, 1)
    }

def print_prediction(home_team, away_team, prediction):
    """Pretty print the match prediction"""
    print(f"\nMatch Prediction: {home_team} (H) vs {away_team} (A)")
    print(f"Expected Goals: {home_team} {prediction['home_xg']} - {prediction['away_xg']} {away_team}")
    print(f"Win Probability: {home_team}: {prediction['home_win_prob']}%")
    print(f"Draw Probability: {prediction['draw_prob']}%")
    print(f"Win Probability: {away_team}: {prediction['away_win_prob']}%")



prediction = predict_match("Leicester", "Arsenal", trace, team_indices)   
print_prediction("Leicester", "Arsenal", prediction) 


Match Prediction: Leicester (H) vs Arsenal (A)
Expected Goals: Leicester 0.77 - 2.34 Arsenal
Win Probability: Leicester: 10.8%
Draw Probability: 17.9%
Win Probability: Arsenal: 71.3%


In [24]:
def predict_asian_handicap(home_team, away_team, trace, team_indices, handicaps=None, n_sims=50000, vig=0.05, random_seed=42):
    """
    Predict Asian handicap odds where favorite always gets minus handicap
    """
    np.random.seed(random_seed)
    
    home_idx = team_indices[home_team]
    away_idx = team_indices[away_team]

    # Get posterior samples
    attack_samples = trace.posterior["attack"].values.flatten()
    defense_samples = trace.posterior["defense"].values.flatten()
    home_advantage = float(trace.posterior["home_advantage"].values.mean())
    
    # Get team-specific parameters
    n_teams = len(team_indices)
    attack_home = attack_samples.reshape(-1)[home_idx::n_teams]
    attack_away = attack_samples.reshape(-1)[away_idx::n_teams]
    defense_home = defense_samples.reshape(-1)[home_idx::n_teams]
    defense_away = defense_samples.reshape(-1)[away_idx::n_teams]
    
    # Calculate expected goals
    home_theta = np.exp(attack_home - defense_away + home_advantage)
    away_theta = np.exp(attack_away - defense_home)
    
    # Calculate mean expected goals to determine favorite
    home_xg = float(home_theta.mean())
    away_xg = float(away_theta.mean())
    
    # Determine favorite and underdog
    if away_xg > home_xg:
        favorite = away_team
        underdog = home_team
        is_away_favorite = True
    else:
        favorite = home_team
        underdog = away_team
        is_away_favorite = False
    
    # Simulate matches
    sample_indices = np.random.randint(0, len(home_theta), size=n_sims)
    home_goals = np.random.poisson(home_theta[sample_indices])
    away_goals = np.random.poisson(away_theta[sample_indices])
    
    # Calculate goal difference from favorite's perspective
    goal_diff = away_goals - home_goals if is_away_favorite else home_goals - away_goals
    
    if handicaps is None:
        handicaps = [-3.0, -2.75, -2.5, -2.25, -2.0, -1.75, -1.5, -1.25, -1.0, -0.75, 
                    -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 
                    2.25, 2.5, 2.75, 3.0]
    
    results = []
    # Add favorite 0.0 line first
    wins = np.sum(goal_diff > 0)
    draws = np.sum(goal_diff == 0)
    prob = (wins + 0.5 * draws) / n_sims
    margin_factor = np.sqrt(1 + vig)
    odds = round(margin_factor / prob, 2) if prob > 0.01 else 999.99
    results.append({
        "line": f"{favorite} 0.0",
        "odds": odds,
        "prob": prob
    })
    
    for handicap in handicaps:
        # For favorite lines
        if handicap < 0:
            # Favorite -X means they need to win by more than X
            if handicap % 0.5 == 0.25:
                lower = np.floor(handicap * 2) / 2
                upper = np.ceil(handicap * 2) / 2
                
                # Win by more than the handicap
                low_wins = np.sum(goal_diff > -lower)
                low_draws = np.sum(goal_diff == -lower)
                up_wins = np.sum(goal_diff > -upper)
                up_draws = np.sum(goal_diff == -upper)
                
                prob = ((low_wins + 0.5 * low_draws) + (up_wins + 0.5 * up_draws)) / (2 * n_sims)
            else:
                wins = np.sum(goal_diff > -handicap)
                draws = np.sum(goal_diff == -handicap)
                prob = (wins + 0.5 * draws) / n_sims
                
            line = f"{favorite} {handicap}"
            
        # For underdog lines
        else:
            # Underdog +X means they need to avoid losing by more than X
            if handicap % 0.5 == 0.25:
                lower = np.floor(handicap * 2) / 2
                upper = np.ceil(handicap * 2) / 2
                
                # Don't lose by more than the handicap
                low_wins = np.sum(goal_diff < lower)
                low_draws = np.sum(goal_diff == lower)
                up_wins = np.sum(goal_diff < upper)
                up_draws = np.sum(goal_diff == upper)
                
                prob = ((low_wins + 0.5 * low_draws) + (up_wins + 0.5 * up_draws)) / (2 * n_sims)
            else:
                wins = np.sum(goal_diff < handicap)
                draws = np.sum(goal_diff == handicap)
                prob = (wins + 0.5 * draws) / n_sims
                
            line = f"{underdog} +{handicap}" if handicap > 0 else f"{underdog} +0.0"
            
        # Calculate odds with margin
        margin_factor = np.sqrt(1 + vig)
        odds = round(margin_factor / prob, 2) if prob > 0.01 else 999.99
            
        results.append({
            "line": line,
            "odds": odds,
            "prob": prob
        })
    
    return pd.DataFrame(results).sort_values('line')
    

# Set a random seed for reproducibility
odds = predict_asian_handicap("Crystal Palace", "Everton", trace, team_indices, vig=0.05, random_seed=26)

# Print results with probabilities for verification
print(odds[['line', 'odds','prob']].to_string(index=False))

                line  odds     prob
Crystal Palace -0.25  1.75 0.587155
 Crystal Palace -0.5  1.96 0.523580
Crystal Palace -0.75  2.21 0.462630
 Crystal Palace -1.0  2.55 0.401680
Crystal Palace -1.25  3.01 0.340730
 Crystal Palace -1.5  3.66 0.279780
Crystal Palace -1.75  4.29 0.239105
 Crystal Palace -2.0  5.16 0.198430
Crystal Palace -2.25  6.50 0.157755
 Crystal Palace -2.5  8.75 0.117080
Crystal Palace -2.75 10.42 0.098365
 Crystal Palace -3.0 12.86 0.079650
  Crystal Palace 0.0  1.57 0.650730
        Everton +0.0  2.93 0.349270
       Everton +0.25  2.48 0.412845
        Everton +0.5  2.15 0.476420
       Everton +0.75  1.91 0.537370
        Everton +1.0  1.71 0.598320
       Everton +1.25  1.55 0.659270
        Everton +1.5  1.42 0.720220
       Everton +1.75  1.35 0.760895
        Everton +2.0  1.28 0.801570
       Everton +2.25  1.22 0.842245
        Everton +2.5  1.16 0.882920
       Everton +2.75  1.14 0.901635
        Everton +3.0  1.11 0.920350


In [7]:
def predict_asian_handicap_from_xg(home_team, away_team, home_xg, away_xg, handicaps=None, n_sims=50000, vig=0.05, random_seed=42):
    """
    Predict Asian handicap odds using manually input expected goals
    """
    np.random.seed(random_seed)
    
    # Determine favorite and underdog based on xG
    if away_xg > home_xg:
        favorite = away_team
        underdog = home_team
        is_away_favorite = True
    else:
        favorite = home_team
        underdog = away_team
        is_away_favorite = False
    
    # Simulate matches
    home_goals = np.random.poisson(home_xg, size=n_sims)
    away_goals = np.random.poisson(away_xg, size=n_sims)
    
    # Calculate goal difference from favorite's perspective
    goal_diff = away_goals - home_goals if is_away_favorite else home_goals - away_goals
    
    if handicaps is None:
        handicaps = [-3.0, -2.75, -2.5, -2.25, -2.0, -1.75, -1.5, -1.25, -1.0, -0.75, 
                    -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 
                    2.25, 2.5, 2.75, 3.0]
    
    results = []
    # Add favorite 0.0 line first
    wins = np.sum(goal_diff > 0)
    draws = np.sum(goal_diff == 0)
    prob = (wins + 0.5 * draws) / n_sims
    margin_factor = np.sqrt(1 + vig)
    odds = round(margin_factor / prob, 2) if prob > 0.01 else 999.99
    results.append({
        "line": f"{favorite} 0.0",
        "odds": odds,
        "prob": prob
    })
    
    for handicap in handicaps:
        # For favorite lines
        if handicap < 0:
            # Favorite -X means they need to win by more than X
            if handicap % 0.5 == 0.25:
                lower = np.floor(handicap * 2) / 2
                upper = np.ceil(handicap * 2) / 2
                
                # Win by more than the handicap
                low_wins = np.sum(goal_diff > -lower)
                low_draws = np.sum(goal_diff == -lower)
                up_wins = np.sum(goal_diff > -upper)
                up_draws = np.sum(goal_diff == -upper)
                
                prob = ((low_wins + 0.5 * low_draws) + (up_wins + 0.5 * up_draws)) / (2 * n_sims)
            else:
                wins = np.sum(goal_diff > -handicap)
                draws = np.sum(goal_diff == -handicap)
                prob = (wins + 0.5 * draws) / n_sims
                
            line = f"{favorite} {handicap}"
            
        # For underdog lines
        else:
            # Underdog +X means they need to avoid losing by more than X
            if handicap % 0.5 == 0.25:
                lower = np.floor(handicap * 2) / 2
                upper = np.ceil(handicap * 2) / 2
                
                # Don't lose by more than the handicap
                low_wins = np.sum(goal_diff < lower)
                low_draws = np.sum(goal_diff == lower)
                up_wins = np.sum(goal_diff < upper)
                up_draws = np.sum(goal_diff == upper)
                
                prob = ((low_wins + 0.5 * low_draws) + (up_wins + 0.5 * up_draws)) / (2 * n_sims)
            else:
                wins = np.sum(goal_diff < handicap)
                draws = np.sum(goal_diff == handicap)
                prob = (wins + 0.5 * draws) / n_sims
                
            line = f"{underdog} +{handicap}" if handicap > 0 else f"{underdog} +0.0"
            
        # Calculate odds with margin
        margin_factor = np.sqrt(1 + vig)
        odds = round(margin_factor / prob, 2) if prob > 0.01 else 999.99
            
        results.append({
            "line": line,
            "odds": odds,
            "prob": prob
        })
    
    return pd.DataFrame(results).sort_values('line')

odds = predict_asian_handicap_from_xg("NEW", "NFO", home_xg=1.88, away_xg=1.08, vig=0.05)

print(odds)

         line  odds      prob
12  NEW -0.25  1.67  0.612455
11   NEW -0.5  1.84  0.556140
10  NEW -0.75  2.06  0.497520
9    NEW -1.0  2.33  0.438900
8   NEW -1.25  2.69  0.380280
7    NEW -1.5  3.19  0.321660
6   NEW -1.75  3.67  0.279330
5    NEW -2.0  4.32  0.237000
4   NEW -2.25  5.26  0.194670
3    NEW -2.5  6.73  0.152340
2   NEW -2.75  7.94  0.129090
1    NEW -3.0  9.68  0.105840
0     NEW 0.0  1.53  0.668770
13   NFO +0.0  3.09  0.331230
14  NFO +0.25  2.64  0.387545
15   NFO +0.5  2.31  0.443860
16  NFO +0.75  2.04  0.502480
17   NFO +1.0  1.83  0.561100
18  NFO +1.25  1.65  0.619720
19   NFO +1.5  1.51  0.678340
20  NFO +1.75  1.42  0.720670
21   NFO +2.0  1.34  0.763000
22  NFO +2.25  1.27  0.805330
23   NFO +2.5  1.21  0.847660
24  NFO +2.75  1.18  0.870910
25   NFO +3.0  1.15  0.894160


In [None]:
import numpy as np
import pandas as pd

def predict_asian_handicap_both_sides(home_team, away_team, home_xg, away_xg, handicaps=None, n_sims=50000, vig=0.05, random_seed=42):
    """
    Predict Asian handicap odds for both teams based on expected goals
    """
    np.random.seed(random_seed)
    
    # Simulate matches
    home_goals = np.random.poisson(home_xg, size=n_sims)
    away_goals = np.random.poisson(away_xg, size=n_sims)
    goal_diff = home_goals - away_goals
    
    if handicaps is None:
        handicaps = [-3.0, -2.75, -2.5, -2.25, -2.0, -1.75, -1.5, -1.25, -1.0, -0.75,
                    -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0,
                    2.25, 2.5, 2.75, 3.0]
    
    results = []
    margin_factor = np.sqrt(1 + vig)
    
    # Calculate probabilities for each handicap
    for handicap in handicaps:
        if handicap % 0.5 == 0.25:  # Split handicap
            lower = np.floor(handicap * 2) / 2
            upper = np.ceil(handicap * 2) / 2
            
            # For home team perspective
            low_wins = np.sum(goal_diff > -lower)
            low_draws = np.sum(goal_diff == -lower)
            up_wins = np.sum(goal_diff > -upper)
            up_draws = np.sum(goal_diff == -upper)
            
            prob_home = ((low_wins + 0.5 * low_draws) + (up_wins + 0.5 * up_draws)) / (2 * n_sims)
        else:  # Whole handicap
            wins = np.sum(goal_diff > -handicap)
            draws = np.sum(goal_diff == -handicap)
            prob_home = (wins + 0.5 * draws) / n_sims
        
        prob_away = 1 - prob_home
        
        # Calculate odds
        home_odds = round(margin_factor / prob_home, 2) if prob_home > 0.01 else 999.99
        away_odds = round(margin_factor / prob_away, 2) if prob_away > 0.01 else 999.99
        
        # Add home team line
        results.append({
            "team": home_team,
            "handicap": handicap,
            "line": f"{home_team} {handicap:+g}" if handicap != 0 else f"{home_team} 0.0",
            "odds": home_odds,
            "prob": prob_home
        })
        
        # Add away team line (with opposite handicap)
        results.append({
            "team": away_team,
            "handicap": -handicap,
            "line": f"{away_team} {-handicap:+g}" if handicap != 0 else f"{away_team} 0.0",
            "odds": away_odds,
            "prob": prob_away
        })
    
    return pd.DataFrame(results).sort_values(['team', 'handicap']).reset_index(drop=True)

# Example usage
odds = predict_asian_handicap_both_sides("BOU", "LIV", home_xg=1.79, away_xg=1.57, vig=0.05)
print(odds)

   team  handicap       line   odds      prob
0   LIV     -3.00     LIV -3  23.82  0.043010
1   LIV     -2.75  LIV -2.75  19.06  0.053765
2   LIV     -2.50   LIV -2.5  15.88  0.064520
3   LIV     -2.25  LIV -2.25  11.39  0.089935
4   LIV     -2.00     LIV -2   8.88  0.115350
5   LIV     -1.75  LIV -1.75   7.28  0.140765
6   LIV     -1.50   LIV -1.5   6.17  0.166180
7   LIV     -1.25  LIV -1.25   4.87  0.210575
8   LIV     -1.00     LIV -1   4.02  0.254970
9   LIV     -0.75  LIV -0.75   3.42  0.299365
10  LIV     -0.50   LIV -0.5   2.98  0.343760
11  LIV     -0.25  LIV -0.25   2.56  0.400755
12  LIV     -0.00    LIV 0.0   2.24  0.457750
13  LIV      0.25  LIV +0.25   1.99  0.514745
14  LIV      0.50   LIV +0.5   1.79  0.571740
15  LIV      0.75  LIV +0.75   1.65  0.621370
16  LIV      1.00     LIV +1   1.53  0.671000
17  LIV      1.25  LIV +1.25   1.42  0.720630
18  LIV      1.50   LIV +1.5   1.33  0.770260
19  LIV      1.75  LIV +1.75   1.28  0.802510
20  LIV      2.00     LIV +2   1.2

In [9]:
def kelly_expected(probability, decimal_odds, fractiont = 1.0):
    if decimal_odds <= 1 or probability <= 0 or probability >= 1:
        return 0, 0
        
    # Calculate Kelly Criterion
    q = 1 - probability  # probability of losing
    kelly = (probability * (decimal_odds - 1) - q) / (decimal_odds - 1) # win rate * potential profit subtracitng prob of losing. Divided by potentail profit
    kelly = kelly * fraction  # Apply fractional Kelly
    kelly = max(0, kelly)  # No negative bets
    
    # Calculate Expected Value
    ev = (probability * (decimal_odds - 1)) - (1 - probability)
    ev = ev * 100  # Convert to percentage
    
    return kelly, ev

prob = 0.497520
odds = 2.06
fraction = 1

bet_size, ev = kelly_expected(prob, odds, fraction)
print(f"Optimal bet size: {bet_size:.1%}")
print(f"Expected Value: {ev:.1f}%")

Optimal bet size: 2.3%
Expected Value: 2.5%
