## Imports

In [1]:
from scipy.optimize import minimize
from itertools import combinations, chain
import random
import numpy as np
import pandas as pd

## Data

In [24]:
df = pd.read_csv("data.csv")
df.head(20)

Unnamed: 0,country,pool,pool_odds,overall_odds
0,Ireland,A,1.35,12.5
1,Scotland,A,4.6,95.0
2,Japan,A,21.0,290.0
3,Russia,A,500.0,1000.0
4,Samoa,A,200.0,1000.0
5,New_Zealand,B,1.39,2.56
6,South_Africa,B,3.55,5.4
7,Italy,B,180.0,1000.0
8,Namibia,B,500.0,1000.0
9,Canada,B,500.0,1000.0


## Normalize the odds

In [3]:
norms = {}
for pool in ["A","B","C","D"]:
    pool_odds = df[df["pool"] == pool]["pool_odds"]
    norm = sum(1/i for i in pool_odds)
    norms[pool] = norm

df["pool_odds"] = df.apply(lambda row: row["pool_odds"]*norms[row["pool"]], axis=1)

norm = sum(1/i for i in df["overall_odds"])
df["overall_odds"] = df.apply(lambda row: row["overall_odds"]*norm, axis=1)

df.head()

Unnamed: 0,country,pool,pool_odds,overall_odds
0,Ireland,A,1.367214,12.686508
1,Scotland,A,4.658655,96.417461
2,Japan,A,21.267773,294.326985
3,Russia,A,506.375546,1014.920638
4,Samoa,A,202.550219,1014.920638


## ELO model

In [4]:
def expected_score(p1_rating, p2_rating):
    return 1 / (1 + 10**((p2_rating - p1_rating)/400))

expected_score(2000, 2000)

0.5

## Pool score function

In [53]:
def pool_simulation(elos):
    points = np.zeros(len(elos))
    for i, j in combinations(range(len(elos)), 2):
        result = expected_score(elos[i], elos[j])
        if random.random() < result:
            points[i] += 1
        else:
            points[j] += 1
    points += np.random.rand(len(elos))
    winner, runner_up = (-points).argsort()[:2]
    return winner, runner_up
    
def pool_score(elos, odds, iterations=10000):
    pool_wins = np.zeros(len(elos))
    for _ in range(iterations):
        winner, _ = pool_simulation(elos)
        pool_wins[winner] += 1.0/iterations
    
    score = sum(abs(a-(1/b)) for a,b in zip(pool_wins, odds))

    return score

pool_score([2000, 2100, 2200, 2300, 2400], [0.1, 0.2, 0.2, 0.2, 0.3])

27.333333333333385

## Optimization

In [None]:
tmp_elos = []
for pool in ["A", "B", "C", "D"]:
    odds = list(df[df["pool"] == pool]["pool_odds"])
    elos = minimize(pool_score, [2000 for _ in range(5)], args=odds, method='Nelder-Mead').x
    tmp_elos.extend(elos)
    
    rounded_elos = sorted([int(e) for e in elos])
    rounded_odds = sorted([round(o, 3) for o in odds])
    
    print("ELOs:", *rounded_elos, sep=" ")
    print("Odds:", *rounded_odds, sep=" ", end="\n\n")

df["ELO"] = tmp_elos
df.head()

## Tournament Score function

In [14]:
def tournament_simulation(new_elos):
    quarters = []
    semis = []
    finals = []
    
    quarter_play = [(4,7), (1,2), (6,5), (0,3)]
    semi_play = [(0,1),(2,3)]
    
    # Play the group stages
    for i in range(4):
        pool_elos = new_elos[5*i:5*(i+1)]
        winner, runner_up = pool_simulation(pool_elos)
        
        # Adjust to get uniqueness across pools
        winner += 5*i
        runner_up += 5*i

        quarters.extend([winner, runner_up])
    
    # Play the quarters
    for index1, index2 in quarter_play:
        team1, team2 = quarters[index1], quarters[index2]
        winner = team1 if random.random() < expected_score(new_elos[team1], new_elos[team2]) else team2
        semis.append(winner)
        
    # Play the semis
    for index1, index2 in semi_play:
        team1, team2 = semis[index1], semis[index2]
        winner = team1 if random.random() < expected_score(new_elos[team1], new_elos[team2]) else team2
        finals.append(winner)
    
    # Play the finals
    team1, team2 = finals
    winner = team1 if random.random() < expected_score(new_elos[team1], new_elos[team2]) else team2
    
    return winner

def tournament_score(changes, elos, odds, iterations=100):
    new_elos = [elos[i] + changes[i//5] for i in range(20)]
    
    tournament_wins = np.zeros(20)
    for _ in range(iterations):
        winner = tournament_simulation(new_elos)
        tournament_wins[winner] += 1.0/iterations
    
    score = sum(abs(a-(1/b)) for a,b in zip(tournament_wins, odds))
    return score

## Optimization (Spoiler alert: This doesn't work)

In [17]:
odds = list(df["overall_odds"])
elos = list(df["ELO"])

changes = minimize(tournament_score, [0, 0, 0, 0], args=(elos,odds), method='Nelder-Mead',options={'maxiter':100}).x

print("Changes: ", np.round(changes, 3))

Changes:  [ 0.  0. -0.  0.]


### Hmm, resort to a normalization pool instead

In [None]:
countries = ["Ireland", "New_Zealand", "England", "Wales"]
odds = df[df["country"].isin(countries)]["overall_odds"]
old_elos = df[df["country"].isin(countries)]["ELO"]

elos = minimize(pool_score, [2000 for _ in range(4)], args=odds, method='Nelder-Mead').x

rounded_elos = sorted([int(e) for e in elos])
rounded_odds = sorted([round(o, 3) for o in odds])

print("ELOs:", *rounded_elos, sep=" ")
print("Odds:", *rounded_odds, sep=" ", end="\n\n")

differences = list((elos - old_elos) - min(elos - old_elos))
differences = {a:b for a,b in zip(["A","B","C","D"], differences)}
print(differences)

In [51]:
df["ELO2"] = df.apply(lambda row: row["ELO"] + differences[row["pool"]], axis=1)
df.head(20)

Unnamed: 0,country,pool,pool_odds,overall_odds,ELO,ELO2
0,Ireland,A,1.35,12.5,2306.936235,2334.555059
1,Scotland,A,4.6,95.0,2087.455605,2115.074429
2,Japan,A,21.0,290.0,1906.505696,1934.12452
3,Russia,A,500.0,1000.0,1838.214878,1865.833701
4,Samoa,A,200.0,1000.0,1864.200126,1891.81895
5,New_Zealand,B,1.39,2.56,2284.778479,2503.378301
6,South_Africa,B,3.55,5.4,2144.525177,2363.124999
7,Italy,B,180.0,1000.0,1837.15316,2055.752982
8,Namibia,B,500.0,1000.0,1922.074664,2140.674487
9,Canada,B,500.0,1000.0,1767.307843,1985.907665
