In [1]:
import numpy as np
import tqdm
from IPython.display import HTML, display
import json

In [2]:
teams = json.load(open('teams.json'))
print(teams)

{'Real Madrid': {'nation': 'Spain', 'coefficient': 136.0, 'pot': 1}, 'Manchester City': {'nation': 'England', 'coefficient': 148.0, 'pot': 1}, 'Bayern Munich': {'nation': 'Germany', 'coefficient': 144.0, 'pot': 1}, 'Paris Saint-Germain': {'nation': 'France', 'coefficient': 116.0, 'pot': 1}, 'Liverpool': {'nation': 'England', 'coefficient': 114.0, 'pot': 1}, 'Inter': {'nation': 'Italy', 'coefficient': 101.0, 'pot': 1}, 'Borussia Dortmund': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1}, 'RB Leipzig': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1}, 'Barcelona': {'nation': 'Spain', 'coefficient': 91.0, 'pot': 1}, 'Bayer Leverkusen': {'nation': 'Germany', 'coefficient': 90.0, 'pot': 2}, 'Atlético Madrid': {'nation': 'Spain', 'coefficient': 89.0, 'pot': 2}, 'Atalanta': {'nation': 'Italy', 'coefficient': 81.0, 'pot': 2}, 'Juventus': {'nation': 'Italy', 'coefficient': 80.0, 'pot': 2}, 'Benfica': {'nation': 'Portugal', 'coefficient': 79.0, 'pot': 2}, 'Arsenal': {'nation': 'England'

In [4]:
matches = json.load(open('matches.json'))

print(f"Number of matches: {len(matches)}")

for match in matches:
    if len(match) != 4:
        print("EERRROR", match)

for team in teams.keys():
    home = []
    away = []
    for match in matches:
        if team == match[1]:
            home.append(match[0])
        elif team == match[2]:
            away.append(match[0])
    if len(set(home)) != 4:
        print("ERROR home", team, home)

    if len(set(away)) != 4:
        print("ERROR away", team, away)

Number of matches: 144


In [5]:
def coeff_based_prob(team1, team2):
    team1_coef = team1["coefficient"]
    team2_coef = team2["coefficient"]
    p = [
        team1_coef / (team1_coef + team2_coef),
        1 / (team1_coef + team2_coef),
        team2_coef / (team1_coef + team2_coef),
    ]
    p = [x / sum(p) for x in p]

    return p

In [36]:
def simulate_match(p=0.5):
    result = np.random.choice(["win", "draw", "lose"], p=p)

    if result == "win":
        return 3, 0
    elif result == "draw":
        return 1, 1
    else:
        return 0, 3


def simulate_matches(teams, matches, p_fn=coeff_based_prob):
    classification = {t: {"pts": 0, "sim": 0, "real": 0} for t in teams}

    for match in matches:
        _, home, away, result = match
        # print(home, away, result)

        if result is None:
            p = p_fn(teams[home], teams[away])
            score1, score2 = simulate_match(p=p)
            classification[home]["sim"] += 1
            classification[away]["sim"] += 1
        else:
            h, a = [int(x) for x in result.split("-")]
            if h > a:
                score1, score2 = 3, 0
            elif h == a:
                score1, score2 = 1, 1
            else:
                score1, score2 = 0, 3

            classification[home]["real"] += 1
            classification[away]["real"] += 1

        classification[home]["pts"] += score1
        classification[away]["pts"] += score2
    return classification


def simulate(runs, teams, matches, p_fn=coeff_based_prob):
    aggregate_results = {
        t: {"avg": None, "top": None, "bottom": None, "sim": 0, "real": 0}
        for t in teams
    }

    for i in tqdm.tqdm(range(runs)):
        results_dict = simulate_matches(teams, matches, p_fn)

        results = [
            (
                r,
                *[
                    results_dict[r]["pts"],
                    results_dict[r]["sim"],
                    results_dict[r]["real"],
                ],
            )
            for r in results_dict
        ]

        results.sort(key=lambda x: x[1], reverse=True)
        for i, (t, p, sim, real) in enumerate(results):
            if aggregate_results[t]["avg"] is None:
                aggregate_results[t]["avg"] = p / runs
            else:
                aggregate_results[t]["avg"] += p / runs
            if (
                aggregate_results[t]["top"] is None
                or i + 1 > aggregate_results[t]["top"]
            ):
                aggregate_results[t]["top"] = i + 1
            if (
                aggregate_results[t]["bottom"] is None
                or i + 1 < aggregate_results[t]["bottom"]
            ):
                aggregate_results[t]["bottom"] = i + 1

            aggregate_results[t]["sim"] = sim
            aggregate_results[t]["real"] = real


    results = [
        (
            t,
            aggregate_results[t]["avg"],
            aggregate_results[t]["top"],
            aggregate_results[t]["bottom"],
            aggregate_results[t]["sim"],
            aggregate_results[t]["real"],
        )
        for t in aggregate_results
    ]

    results.sort(key=lambda x: x[1], reverse=True)
    return results

In [46]:
def print_results(results):
    rows = []
    for i, (t, avg, worst, top, sim, real) in enumerate(results):
        pos = i + 1
        row = [pos, t, round(avg), top, worst, sim, real, "#000"]

        if pos >= 25:
            row[-1] = "#E3735E"
        elif i >= 8:
            row[-1] = "#FFBF00"
        rows.append(row)

    content = "".join(
        [
            f"<tr><td style='color:{row[-1]}'>{row[0]}</td>{''.join(['<td>'+str(r)+'</td>' for r in row[1:-1]])}</tr>"
            for row in rows
        ]
    )

    display(
        HTML(
            f"<table><tr><th>Pos</th><th>Team</th><th>Points</th><th>Best Placement</th><th>Worst Placement</th><th>Simulated</th><th>Played</th></tr>{content}</table>"
        )
    )

In [52]:
runs = 10_000

results = simulate(runs, teams, matches, p_fn=coeff_based_prob)

100%|██████████| 10000/10000 [00:12<00:00, 806.33it/s]


In [53]:
print_results(results)

Pos,Team,Points,Best,Worst,Simulated,Played
1,Bayern Munich,18,1,34,7,1
2,Real Madrid,17,1,34,7,1
3,Liverpool,17,1,35,7,1
4,Borussia Dortmund,17,1,35,7,1
5,Manchester City,16,1,36,7,1
6,Atlético Madrid,16,1,35,7,1
7,Bayer Leverkusen,16,1,35,7,1
8,Paris Saint-Germain,16,1,36,7,1
9,Juventus,15,1,36,7,1
10,Inter,15,1,36,7,1
