In [7]:
import numpy as np
import pandas as pd
import csv
import math
import random
import matplotlib.pyplot as plt

import sys
sys.path.append("./src")
from world_cup_simulator import *

### Simulate group stage

#### The gist is to read from two files: One defining the match schedule, the other with teams and their relative strengths (given by Elo ratings prior to the start of the event)

In [8]:
# However, this cell is for illustrative purposes
games_pd = pd.read_csv("data/matches.csv")
teams_pd = pd.read_csv("data/roster.csv")

In [9]:
# Reads in the matches and teams as dictionaries and proceeds with that data type
n = 1   # How many simulations to run

for i in range(n):
    games = read_games("data/matches.csv")
    teams = {}
    
    for row in [item for item in csv.DictReader(open("data/roster.csv"))]:
        teams[row['team']] = {'name': row['team'], 'rating': float(row['rating']), 'points': 0}
    
    simulate_group_stage(games, teams, ternary=False)
    
    for key in teams.keys():
        f = teams_pd['team'] == key
        teams_pd.loc[f, f"simulation{i+1}"] = teams[key]['points']
    
    teams_pd[f"simulation{i+1}"] = teams_pd.groupby('group')[f"simulation{i+1}"].rank(ascending=False)

NameError: name 'simulate_game' is not defined

In [None]:
sim_cols = [i for i in teams_pd.columns if "simulation" in i]

In [None]:
teams_pd['avg_pos'] = teams_pd[sim_cols].mean(axis=1)
teams_pd['median_pos'] = teams_pd[sim_cols].median(axis=1)
teams_pd['std_pos'] = teams_pd[sim_cols].std(axis=1)

In [None]:
not_sim = [j for j in teams_pd.columns if "simulation" not in j]

In [None]:
teams_pd[not_sim].sort_values(by=['group', 'avg_pos'])

### Simulating playoffs

In [None]:
playoff_games_pd = pd.read_csv("playoff_matches.csv")
playoff_teams_pd = pd.read_csv("playoff_roster.csv")

In [None]:
# Now, doing the Monte Carlo simulations
n = 10000
playoff_results_teams = []
playoff_results_stage = []

for i in range(n):
    overall_result_teams = dict()
    overall_result_stage = dict()
    games = read_games("playoff_matches.csv")
    teams = {}
    
    for row in [item for item in csv.DictReader(open("playoff_roster.csv"))]:
        teams[row['team']] = {'name': row['team'], 'rating': float(row['rating'])}
    
    simulate_playoffs(games, teams, ternary=True)
    
    playoff_pd = pd.DataFrame(games)
    
    # This is for collecting results of simulations per team
    for key in teams.keys():
        overall_result_teams[key] = collect_playoff_results(key, playoff_pd)
    playoff_results_teams.append(overall_result_teams)
    
    # Now, collecting results from stage-perspective
    overall_result_stage['Quarterfinals'] = playoff_pd.loc[playoff_pd['stage'] == 'eigths_finals', 'advances'].to_list()
    overall_result_stage['Semifinals'] = playoff_pd.loc[playoff_pd['stage'] == 'quarterfinals', 'advances'].to_list() + [None]*4
    overall_result_stage['Final'] = playoff_pd.loc[playoff_pd['stage'] == 'semifinals', 'advances'].to_list() + [None]*6
    overall_result_stage['third_place_match'] = playoff_pd.loc[playoff_pd['stage'] == 'semifinals', 'loses'].to_list() + [None]*6
    overall_result_stage['fourth_place'] = playoff_pd.loc[playoff_pd['stage'] == 'third_place', 'loses'].to_list() + [None]*7
    overall_result_stage['third_place'] = playoff_pd.loc[playoff_pd['stage'] == 'third_place', 'advances'].to_list() + [None]*7
    overall_result_stage['second_place'] = playoff_pd.loc[playoff_pd['stage'] == 'final', 'loses'].to_list() + [None]*7
    overall_result_stage['Champion'] = playoff_pd.loc[playoff_pd['stage'] == 'final', 'advances'].to_list() + [None]*7
    
    playoff_results_stage.append(overall_result_stage)

In [None]:
results_teams = pd.DataFrame(playoff_results_teams)

In [None]:
results_teams['Brazil'].value_counts(normalize=True)

In [None]:
results_stage = pd.DataFrame(playoff_results_stage)

In [None]:
results_stage['Quarterfinals'].value_counts(normalize=True)