In [1]:
import numpy as np
import pandas as pd
import csv
import math
import random
import matplotlib.pyplot as plt
from tqdm import tqdm

import sys
sys.path.append("src")
from world_cup_simulator import *

### Simulate group stage

#### The gist is to read from two files: One defining the match schedule, the other with teams and their relative strengths (given by Elo ratings prior to the start of the event)

In [None]:
# However, this cell is for illustrative purposes
games_pd = pd.read_csv("data/matches.csv")
teams_pd = pd.read_csv("data/roster1.csv")

In [None]:
# Reads in the matches and teams as dictionaries and proceeds with that data type
n = 10000 # How many simulations to run

for i in tqdm(range(n)):
    games = read_games("data/matches.csv")
    teams = {}
    
    for row in [item for item in csv.DictReader(open("data/roster1.csv"))]:
        teams[row['team']] = {'name': row['team'], 'rating': float(row['rating']), 'points': float(row['points'])}
    
    simulate_group_stage(games, teams, ternary=False)
    
    collector = []
    for key in teams.keys():
        collector.append({"team": key, f"simulation{i+1}": teams[key]['points']})

    temp = pd.DataFrame(collector)
    teams_pd = pd.merge(teams_pd, temp)
    
#     teams_pd[f"simulation{i+1}"] = teams_pd.groupby('group')[f"simulation{i+1}"].rank(ascending=False)

In [None]:
sim_cols = [i for i in teams_pd.columns if "simulation" in i]

In [None]:
teams_pd['average_points'] = teams_pd[sim_cols].mean(axis=1)
teams_pd['median_points'] = teams_pd[sim_cols].median(axis=1)
teams_pd['standard_deviation'] = teams_pd[sim_cols].std(axis=1)

In [None]:
not_sim = [j for j in teams_pd.columns if "simulation" not in j]

In [None]:
teams_pd[not_sim].sort_values(by=['group', 'average_points'], ascending=False)

### Simulating playoffs

In [2]:
playoff_games_pd = pd.read_csv("data/playoff_matches.csv")
playoff_teams_pd = pd.read_csv("data/playoff_roster.csv")

In [3]:
# Now, doing the Monte Carlo simulations
n = 10000
playoff_results_teams = []
playoff_results_stage = []

for i in tqdm(range(n)):
    overall_result_teams = dict()
    overall_result_stage = dict()
    games = read_games("data/playoff_matches.csv")
    teams = {}
    
    for row in [item for item in csv.DictReader(open("data/playoff_roster.csv"))]:
        teams[row['team']] = {'name': row['team'], 'rating': float(row['rating'])}
    
    simulate_playoffs(games, teams, ternary=True)
    
    playoff_pd = pd.DataFrame(games)
    
    # This is for collecting results of simulations per team
    for key in teams.keys():
        overall_result_teams[key] = collect_playoff_results(key, playoff_pd)
    playoff_results_teams.append(overall_result_teams)
    
    # Now, collecting results from stage-perspective
    overall_result_stage['whole_bracket'] = playoff_pd['advances'].to_list()
    overall_result_stage['Quarterfinals'] = playoff_pd.loc[playoff_pd['stage'] == 'eigths_finals', 'advances'].to_list()
    overall_result_stage['Semifinals'] = playoff_pd.loc[playoff_pd['stage'] == 'quarterfinals', 'advances'].to_list()
    overall_result_stage['Final'] = playoff_pd.loc[playoff_pd['stage'] == 'semifinals', 'advances'].to_list()
    overall_result_stage['third_place_match'] = playoff_pd.loc[playoff_pd['stage'] == 'semifinals', 'loses'].to_list()
    overall_result_stage['fourth_place'] = playoff_pd.loc[playoff_pd['stage'] == 'third_place', 'loses'].to_list()
    overall_result_stage['third_place'] = playoff_pd.loc[playoff_pd['stage'] == 'third_place', 'advances'].to_list()
    overall_result_stage['second_place'] = playoff_pd.loc[playoff_pd['stage'] == 'final', 'loses'].to_list()
    overall_result_stage['Champion'] = playoff_pd.loc[playoff_pd['stage'] == 'final', 'advances'].to_list()
    overall_result_stage['match8'] = list(playoff_pd.loc[8, ['home_team', 'away_team']])
    overall_result_stage['match9'] = list(playoff_pd.loc[9, ['home_team', 'away_team']])
    overall_result_stage['match10'] = list(playoff_pd.loc[10, ['home_team', 'away_team']])
    overall_result_stage['match11'] = list(playoff_pd.loc[11, ['home_team', 'away_team']])
    overall_result_stage['match12'] = list(playoff_pd.loc[12, ['home_team', 'away_team']])
    overall_result_stage['match13'] = list(playoff_pd.loc[13, ['home_team', 'away_team']])
    overall_result_stage['match14'] = list(playoff_pd.loc[14, ['home_team', 'away_team']])
    overall_result_stage['match15'] = list(playoff_pd.loc[15, ['home_team', 'away_team']])
    
    playoff_results_stage.append(overall_result_stage)

100%|██████████| 10000/10000 [02:52<00:00, 57.93it/s]


In [4]:
results_teams = pd.DataFrame(playoff_results_teams)

In [24]:
results_teams['France'].value_counts()

Quarterfinals    3570
Round_of_16      2576
Second_place     1315
Fourth_place     1012
Champion          807
Third_place       720
Name: France, dtype: int64

In [6]:
results_stage = pd.DataFrame(playoff_results_stage)

In [23]:
results_stage['match9'].value_counts()

[Croatia, Brazil]         4868
[Japan, Brazil]           4252
[Croatia, South Korea]     469
[Japan, South Korea]       411
Name: match9, dtype: int64