In [133]:
import json
from pprint import pp

import numpy as np
import pandas as pd
import tqdm
from IPython.display import HTML, display

In [134]:
teams = json.load(open('teams.json'))
pp(teams)

{'Real Madrid': {'nation': 'Spain', 'coefficient': 136.0, 'pot': 1},
 'Manchester City': {'nation': 'England', 'coefficient': 148.0, 'pot': 1},
 'Bayern Munich': {'nation': 'Germany', 'coefficient': 144.0, 'pot': 1},
 'Paris Saint-Germain': {'nation': 'France', 'coefficient': 116.0, 'pot': 1},
 'Liverpool': {'nation': 'England', 'coefficient': 114.0, 'pot': 1},
 'Inter': {'nation': 'Italy', 'coefficient': 101.0, 'pot': 1},
 'Borussia Dortmund': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1},
 'RB Leipzig': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1},
 'Barcelona': {'nation': 'Spain', 'coefficient': 91.0, 'pot': 1},
 'Bayer Leverkusen': {'nation': 'Germany', 'coefficient': 90.0, 'pot': 2},
 'Atlético Madrid': {'nation': 'Spain', 'coefficient': 89.0, 'pot': 2},
 'Atalanta': {'nation': 'Italy', 'coefficient': 81.0, 'pot': 2},
 'Juventus': {'nation': 'Italy', 'coefficient': 80.0, 'pot': 2},
 'Benfica': {'nation': 'Portugal', 'coefficient': 79.0, 'pot': 2},
 'Arsenal': {'nati

In [135]:
matches = json.load(open('matches.json'))

print(f"Number of matches: {len(matches)}")

for match in matches:
    if len(match) != 4:
        print("EERRROR", match)

for team in teams.keys():
    home = []
    away = []
    for match in matches:
        if team == match[1]:
            home.append(match[0])
        elif team == match[2]:
            away.append(match[0])
    if len(set(home)) != 4:
        print("ERROR home", team, home)

    if len(set(away)) != 4:
        print("ERROR away", team, away)

Number of matches: 144


In [136]:
def coeff_based_prob(team1, team2):
    team1_coef = team1["coefficient"]
    team2_coef = team2["coefficient"]
    p = [
        team1_coef / (team1_coef + team2_coef),
        1 / (team1_coef + team2_coef),
        team2_coef / (team1_coef + team2_coef),
    ]
    p = [x / sum(p) for x in p]

    return p

def coeff_win_prob(team1, team2):
    team1_coef = team1["coefficient"]
    team2_coef = team2["coefficient"]
    p = [
        team1_coef / (team1_coef + team2_coef),
        team2_coef / (team1_coef + team2_coef),
    ]
    p = [x / sum(p) for x in p]

    return p

def simulate_goals(p):
    goals = [0, 0]
    for i in range(2):
        goals[i] = np.random.poisson(3 * p[i])
    return goals

In [137]:
def simulate_match(p=0.5):
    result = np.random.choice(["win", "draw", "lose"], p=p)

    if result == "win":
        return 3, 0
    elif result == "draw":
        return 1, 1
    else:
        return 0, 3


def simulate_matches(teams, matches, p_fn=coeff_win_prob):
    classification = {
        t: {"pts": 0, "sim": 0, "real": 0, "g_done": 0, "g_conc": 0} for t in teams
    }

    for match in matches:
        _, home, away, result = match

        if result is None:
            p = p_fn(teams[home], teams[away])
            goals_h, goals_a = simulate_goals(p=p)

            classification[home]["sim"] += 1
            classification[away]["sim"] += 1
        else:
            goals_h, goals_a = [int(x) for x in result.split("-")]

            classification[home]["real"] += 1
            classification[away]["real"] += 1

        if goals_h > goals_a:
            score1, score2 = 3, 0
        elif goals_h == goals_a:
            score1, score2 = 1, 1
        else:
            score1, score2 = 0, 3

        classification[home]["g_done"] += goals_h
        classification[home]["g_conc"] += goals_a
        classification[away]["g_done"] += goals_a
        classification[away]["g_conc"] += goals_h

        classification[home]["pts"] += score1
        classification[away]["pts"] += score2
    return classification


def simulate(runs, teams, matches, p_fn=coeff_win_prob):

    rows = []
    for _ in tqdm.tqdm(range(runs)):
        results_dict = simulate_matches(teams, matches, p_fn)
        for r in results_dict:
            rows.append(
                [
                    r,
                    results_dict[r]["pts"],
                    results_dict[r]["sim"],
                    results_dict[r]["real"],
                    results_dict[r]["g_done"],
                    results_dict[r]["g_conc"],
                ]
            )
    df = pd.DataFrame(
        rows,
        columns=["Team", "Points", "Simulated", "Real", "Goals Done", "Goals Conceded"]
    )
    return df

In [138]:
def print_results(results):
    rows = []
    for i, (t, avg) in enumerate(results):
        pos = i + 1
        row = [pos, t, round(avg), "#000"]

        if pos >= 25:
            row[-1] = "#E3735E"
        elif i >= 8:
            row[-1] = "#FFBF00"
        rows.append(row)

    content = "".join(
        [
            f"<tr><td style='color:{row[-1]}'>{row[0]}</td>{''.join(['<td>'+str(r)+'</td>' for r in row[1:-1]])}</tr>"
            for row in rows
        ]
    )

    display(
        HTML(
            f"<table><tr><th>Pos</th><th>Team</th><th>Points</th></tr>{content}</table>"
        )
    )

In [143]:
runs = 10_000

results = simulate(runs, teams, matches, p_fn=coeff_win_prob)

100%|██████████| 10000/10000 [00:03<00:00, 3310.77it/s]


In [144]:
agg_results = results.groupby("Team").agg({"Points": "mean", "Goals Done": "mean", "Goals Conceded": "mean", "Simulated": "max", "Real": "max"})
agg_results.sort_values(["Points", "Goals Done"], ascending=False, inplace=True)
agg_results

Unnamed: 0_level_0,Points,Goals Done,Goals Conceded,Simulated,Real
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bayern Munich,17.9481,23.8105,8.2237,7,1
Real Madrid,17.5948,17.5208,7.4572,7,1
Borussia Dortmund,16.9264,17.0754,7.025,7,1
Liverpool,16.8503,17.017,8.0179,7,1
Manchester City,16.3801,15.1761,5.8128,7,1
Paris Saint-Germain,15.7117,13.9515,8.0091,7,1
Bayer Leverkusen,15.7087,17.0107,7.9889,7,1
Atlético Madrid,15.6378,14.8495,9.1185,7,1
Inter,15.1525,14.1606,6.9098,7,1
Juventus,14.8785,15.3505,9.6875,7,1


In [145]:
standing = agg_results[['Points']].reset_index()
print_results(standing.values)

Pos,Team,Points
1,Bayern Munich,18
2,Real Madrid,18
3,Borussia Dortmund,17
4,Liverpool,17
5,Manchester City,16
6,Paris Saint-Germain,16
7,Bayer Leverkusen,16
8,Atlético Madrid,16
9,Inter,15
10,Juventus,15
