In [10]:
import numpy as np
import tqdm
from IPython.display import HTML, display
import json

In [48]:
teams = json.load(open('teams.json'))
print(teams)

{'Real Madrid': {'nation': 'Spain', 'coefficient': 136.0, 'pot': 1}, 'Manchester City': {'nation': 'England', 'coefficient': 148.0, 'pot': 1}, 'Bayern Munich': {'nation': 'Germany', 'coefficient': 144.0, 'pot': 1}, 'Paris Saint-Germain': {'nation': 'France', 'coefficient': 116.0, 'pot': 1}, 'Liverpool': {'nation': 'England', 'coefficient': 114.0, 'pot': 1}, 'Inter': {'nation': 'Italy', 'coefficient': 101.0, 'pot': 1}, 'Borussia Dortmund': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1}, 'RB Leipzig': {'nation': 'Germany', 'coefficient': 97.0, 'pot': 1}, 'Barcelona': {'nation': 'Spain', 'coefficient': 91.0, 'pot': 1}, 'Bayer Leverkusen': {'nation': 'Germany', 'coefficient': 90.0, 'pot': 2}, 'Atlético Madrid': {'nation': 'Spain', 'coefficient': 89.0, 'pot': 2}, 'Atalanta': {'nation': 'Italy', 'coefficient': 81.0, 'pot': 2}, 'Juventus': {'nation': 'Italy', 'coefficient': 80.0, 'pot': 2}, 'Benfica': {'nation': 'Portugal', 'coefficient': 79.0, 'pot': 2}, 'Arsenal': {'nation': 'England'

Number of matches: 144
[[3, 'Real Madrid', 'Borussia Dortmund', None], [4, 'Real Madrid', 'Milan', None], [7, 'Real Madrid', 'Red Bull Salzburg', None], [1, 'Real Madrid', 'VfB Stuttgart', '3-1']]


In [63]:
matches = json.load(open('matches.json'))

print(f"Number of matches: {len(matches)}")

for match in matches:
    if len(match) != 4:
        print("EERRROR", match)

for team in teams.keys():
    home = []
    away = []
    for match in matches:
        if team == match[1]:
            home.append(match[0])
        elif team == match[2]:
            away.append(match[0])
    if len(set(home)) != 4:
        print("ERROR home", team, home)

    if len(set(away)) != 4:
        print("ERROR away", team, away)

Number of matches: 144


In [64]:
def coeff_based_prob(team1, team2):
    team1_coef = team1["coefficient"]
    team2_coef = team2["coefficient"]
    p = [
        team1_coef / (team1_coef + team2_coef),
        1 / (team1_coef + team2_coef),
        team2_coef / (team1_coef + team2_coef),
    ]
    p = [x / sum(p) for x in p]

    return p

In [68]:
def simulate_match(p=0.5):
    result = np.random.choice(["win", "draw", "lose"], p=p)

    if result == "win":
        return 3, 0
    elif result == "draw":
        return 1, 1
    else:
        return 0, 3


def simulate_matches(teams, matches, p_fn=coeff_based_prob):
    classification = {t: 0 for t in teams}

    for match in matches:
        _, home, away, result = match
        # print(home, away, result)

        if result is None:
            p = p_fn(teams[home], teams[away])
            score1, score2 = simulate_match(p=p)
        else:
            h, a = [int(x) for x in result.split("-")]
            if h > a:
                score1, score2 = 3, 0
            elif h == a:
                score1, score2 = 1, 1
            else:
                score1, score2 = 0, 3
            # score1, score2 =
        classification[home] += score1
        classification[away] += score2
    return classification


def simulate(runs, teams, matches, p_fn=coeff_based_prob):
    aggregate_results = {t: {"avg": None, "top": None, "bottom": None} for t in teams}

    for i in tqdm.tqdm(range(runs)):
        results_dict = simulate_matches(teams, matches, p_fn)

        results = [(r, results_dict[r]) for r in results_dict]

        results.sort(key=lambda x: x[1], reverse=True)
        for i, (t, p) in enumerate(results):
            if aggregate_results[t]["avg"] is None:
                aggregate_results[t]["avg"] = p / runs
            else:
                aggregate_results[t]["avg"] += p / runs
            if (
                aggregate_results[t]["top"] is None
                or i + 1 > aggregate_results[t]["top"]
            ):
                aggregate_results[t]["top"] = i + 1
            if (
                aggregate_results[t]["bottom"] is None
                or i + 1 < aggregate_results[t]["bottom"]
            ):
                aggregate_results[t]["bottom"] = i + 1

    results = [
        (
            t,
            aggregate_results[t]["avg"],
            aggregate_results[t]["top"],
            aggregate_results[t]["bottom"],
        )
        for t in aggregate_results
    ]

    results.sort(key=lambda x: x[1], reverse=True)
    return results

In [69]:
def print_results(results):
    rows = []
    for i, (t, avg, worst, top) in enumerate(results):
        pos = i + 1
        row = [pos, t, round(avg), top, worst, "#000"]
        # print(pos, t, f"Media punti {round(avg)}", f"Miglior piazzamento {top}", f"Peggior piazzamento {bottom}")

        if pos >= 24:
            row[-1] = "#E3735E"
        elif i >= 8:
            row[-1] = "#FFBF00"
        rows.append(row)

    content = "".join(
        [f"<tr ><td style='color:{row[-1]}'>{row[0]}</td><td>{row[1]}</td><td>{row[2]}</td><td>{row[3]}</td><td>{row[4]}</td></tr>" for row in rows]
    )   

    display(
        HTML(
            f"<table><tr><th>Pos</th><th>Team</th><th>Points</th><th>Best</th><th>Worst</th></tr>{content}</table>"
        )
    )

In [70]:
runs = 10_000


results = simulate(runs, teams, matches, p_fn=coeff_based_prob)

  0%|          | 0/10000 [00:00<?, ?it/s]

Real Madrid Borussia Dortmund None





KeyError: 3

In [28]:
print_results(results)

Pos,Team,Points,Best,Worst
1,Real Madrid,17,1,34
2,Bayern Munich,17,1,34
3,Manchester City,17,1,35
4,Liverpool,16,1,36
5,Borussia Dortmund,16,1,36
6,Paris Saint-Germain,16,1,36
7,Inter,15,1,35
8,RB Leipzig,15,1,36
9,Atalanta,15,1,36
10,Barcelona,15,1,36
