# Modeling Euro 2020

## Simulating the data based on teams' rankings on FIFA

In [22]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Import essential libraries
import random as rnd
import pandas as pd

In [23]:
# Define class Team
class Team:
    # Set LUCK as a static variable
    LUCK = float(0)

    def __init__(self, name, strength = None):
        self.name = name
        self.strength = strength if strength else rnd.uniform(0,1)

    # Redefine __str__ and __repr__ so the class can return string itself
    def __str__(self):
        return self.name

    def __repr__(self):
        return self.name

    # Redefine __lt__
    def __lt__(self, t):
        return self.strength < t.strength + rnd.gauss(0, Team.LUCK)

In [24]:
# Simulating the Data
## Take teams' strength from FIFA rankings pre-Euro 2020
teams_data = [[("Italy", 7), ("Wales", 17), ("Switzerland", 13), ("Turkey", 29)],
              [("Belgium", 1), ("Denmark", 10), ("Finland", 54), ("Russia", 38)],
              [("Netherlands", 16), ("Austria", 23), ("Ukraine", 24), ("North Macedonia", 62)],
              [("England", 4), ("Croatia", 14), ("Czech Republic", 40), ("Scotland", 44)],
              [("Sweden", 18), ("Spain", 6), ("Slovakia", 36), ("Poland", 21)],
              [("France", 2), ("Germany", 12), ("Portugal", 5), ("Hungary", 37)]]

ratings = sum([[rating for _,rating in group_data] for group_data in teams_data], [])
smallest = min(ratings)
largest = max(ratings)
teams = [[Team(name, 1 - (rating - smallest) / (largest - smallest))
         for name,rating in group_data] for group_data in teams_data]

In [25]:
## A function to simulate the data
def champion():
    ## Simulating the group stage
    after_groups = [sorted(group, reverse=True) for group in teams]
    third_placers = sorted([g[2] for g in after_groups], reverse=True)
    winners = sum([g[:2] for g in after_groups], [])
    teams16 = winners + third_placers

    ## Shuffle the 16 teams to arrange them into pairs for the knock-out stage
    rnd.shuffle(teams16)
    pairs16 = list(zip(teams16[::2], teams16[1::2]))

    # Find round of 16 teams and quarter-finalists
    teams8 = [sorted(g, reverse=True)[0] for g in pairs16]
    pairs8 = list(zip(teams8[::2], teams8[1::2]))
    teams4 = [sorted(g, reverse=True)[0] for g in pairs8]
    pairs4 = list(zip(teams4[::2], teams4[1::2]))

    # Find the semi-finalists
    teams2 = [sorted(g, reverse=True) for g in pairs4]
    first, second = sorted([teams2[0][0], teams2[1][0]], reverse=True)
    third, _ = sorted([teams2[0][1], teams2[1][1]], reverse=True)
    return first, second

In [38]:
# Create a dictionary to store teams and their win times
def result_list():
    team_list = [item for sublist in teams for item in sublist]
    result_lst = []
    for team in team_list:
        result_dict = {}
        result_dict['name'] = team
        result_dict['win'] = 0
        result_dict['runner_up'] = 0
        result_lst.append(result_dict)
    return (result_lst)

# Let run the simulation for certain trial times and count the number of times that a team would win

def simulation(result_list, trial_times: int):
    for i in range(1,trial_times):
        first, second = champion()
        for team in result_list:
            if first == team['name']:
                team['win'] += 1
            if second == team['name']:
                team['runner_up'] += 1
        i += 1
    return result_list

# Define main function
def main(trial_times):
    result_lst = result_list()
    df = pd.DataFrame(simulation(result_lst, trial_times))
    df['win_probability'] = df['win'] * 100 / trial_times
    df['runner_up_probability'] = df['runner_up'] * 100 / trial_times
    df = df.sort_values(by=['win','runner_up'], ascending=False)
#     df.to_csv('result.csv')
    return df

In [39]:
df = main(1000000)
df

Unnamed: 0,name,win,runner_up,win_probability,runner_up_probability
4,Belgium,889042,0,88.9042,0.0
20,France,104457,419062,10.4457,41.9062
12,England,6500,288100,0.65,28.81
22,Portugal,0,156042,0.0,15.6042
17,Spain,0,77774,0.0,7.7774
0,Italy,0,36157,0.0,3.6157
5,Denmark,0,15252,0.0,1.5252
21,Germany,0,5564,0.0,0.5564
2,Switzerland,0,1643,0.0,0.1643
13,Croatia,0,353,0.0,0.0353


## Conclusion

We all know these facts:
- **Italy** was **Euro 2020's Champions** but their win rate is 0%
- **Belgium** was knocked out by **Italy** in Quarter-Final
- **France** even got beaten by **Switzerland** in Round of 16
- And **England** at least was **Euro 2020's Runner-Up** with the rate as 28.81%

**Fifa ranking** is absolutely a factor to determine winner but *this is football after all.* We should look out for *mentality, luck,* even *team's bottling* and ~UEFA's conspiracy~.