In [None]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

group_stage_table = pd.read_csv(r'CSVs\Group_Stage_Table.csv' , encoding ='latin-1')
world_cup_matches = pd.read_csv(r'CSVs\World_Cup_Matches.csv', encoding = 'latin-1')
world_cup_teams = pd.read_csv(r'CSVs\World_Cup_Teams.csv', encoding='latin-1')
squad_predictions = pd.read_csv(r'CSVs\Predictions\Predictions_8-21.csv', encoding ='latin-1')

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Final Simulation Code Chunk

#######################

# Simulation Setup

np.random.seed(22)
num_sims = 1000

## Creating round dataframes with match information

group_stage_matches = world_cup_matches[(world_cup_matches['Stage']=='Group')]
r_16_matches = world_cup_matches[(world_cup_matches['Group']=='R16')]
r_8_matches = world_cup_matches[(world_cup_matches['Group']=='R8')]
r_4_matches = world_cup_matches[(world_cup_matches['Group']=='Semi-Finals')]
final_match = world_cup_matches[(world_cup_matches['Group']=='Finals')]

group_results = []
knockout_sims = []
r_8_sims = []
r_4_sims = []
final_sims = []

# Group Stage Simulation 

## Need to lookup Squad percentages from Squad_Predictions DF

group_stage_matches['S1%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 1'], right_on=['Squad'])['% of Points Taken']
group_stage_matches['S2%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 2'], right_on=['Squad'])['% of Points Taken']

## Calculate Probability of S1 winning based on Log 5 Formula

group_stage_matches['S1_Prob'] =((group_stage_matches['S1%'] - (group_stage_matches['S1%']*group_stage_matches['S2%'])) / (group_stage_matches['S1%']))

## Create Group Stage Match Sims

group_match_sims = []

for index,row in group_stage_matches.iterrows():
    result = np.random.binomial(n=1, p = row.S1_Prob, size=num_sims)
    group_match_sims.append(result)


## Add match sim information into group_results 


for index, row in squad_predictions.iterrows():
    team = row.Squad
    group = row.Group
    team_percent = row["% of Points Taken"]
    
    team_rows = group_stage_matches.loc[(group_stage_matches['Squad 1'] == team)| (group_stage_matches['Squad 2']==team),['Squad 1','Squad 2', 'Group']]
    team_rows['is_squad_1'] = team_rows['Squad 1'] == team
    team_sims = [group_match_sims[i] for i in team_rows.index.to_list()]
    
    team_wins = []
    for x in range(len(team_rows)):
        is_squad_1 = team_rows.is_squad_1.values[x]
        game_wins = [y if is_squad_1 else int(not y) for y in team_sims[x]]
        team_wins.append(game_wins)
        
    team_results = np.sum(team_wins, axis=0)
    
    
    group_results.append({'team':team, 'group':group, 'team_percent':team_percent, 'wins':team_results, 'points':team_results*3})

     
    
#R16 Simulation

###############################
    
for z in range(num_sims):
    if (z > 0) and (z % (num_sims / 10) == 0):
        print(f"Simulation {z} Complete.")
        
    elif z == (num_sims - 1):
        print(f"Simulation Complete - The world cup was simulated {num_sims} times")
        
        
    r16_df = [{'team':x['team'], 'group':x['group'], 'team_percent':x['team_percent'], 'wins':x['wins'][z], 'points':x['points'][z]} for x in group_results] 
    r16_df = pd.DataFrame(r16_df)
    r16_df = r16_df.sort_values(by=['group','points', 'team_percent'], ascending=[True,False,False]).reset_index(drop=True)
    r16_df['rank'] = (r16_df.index % 4) + 1
    r16_df['rank'] = r16_df['rank'].astype(str)
    r16_df['seed'] = r16_df['rank'] + r16_df['group']
    
    r_16_results = pd.merge(r_16_matches, r16_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_16_results.drop(['Squad 1'], axis=1, inplace=True)
    r_16_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_16_results = pd.merge(r_16_results, r16_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_16_results.drop(['Squad 2'], axis=1, inplace=True)
    r_16_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_16_results['S1_Prob'] =((r_16_results['S1%'] - (r_16_results['S1%']*r_16_results['S2%'])) / (r_16_results['S1%'] + r_16_results['S2%'] - (2 * r_16_results['S1%'] * r_16_results['S2%'])))

    r_16_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_16_results.S1_Prob]
    r_16_results['Simulation'] = z + 1
    knockout_sims.append(r_16_results)

#R8 Simulation

###############################

    r8_df = []
    for index, row in r_16_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        r8_df.append(data)

    r8_df = pd.DataFrame(r8_df)
    
    r_8_results = pd.merge(r_8_matches, r8_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_8_results.drop(['Squad 1'], axis=1, inplace=True)
    r_8_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_8_results = pd.merge(r_8_results, r8_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_8_results.drop(['Squad 2'], axis=1, inplace=True)
    r_8_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_8_results['S1_Prob'] =((r_8_results['S1%'] - (r_8_results['S1%']*r_8_results['S2%'])) / (r_8_results['S1%'] + r_8_results['S2%'] - (2 * r_8_results['S1%'] * r_8_results['S2%'])))

    r_8_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_8_results.S1_Prob]
    r_8_results['Simulation'] = z + 1
    r_8_sims.append(r_8_results)

    
#Semi-finals Simulation
###############################

    r_4_df = []
    for index, row in r_8_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        r_4_df.append(data)

    r_4_df = pd.DataFrame(r_4_df)
    
    r_4_results = pd.merge(r_4_matches, r_4_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_4_results.drop(['Squad 1'], axis=1, inplace=True)
    r_4_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_4_results = pd.merge(r_4_results, r_4_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_4_results.drop(['Squad 2'], axis=1, inplace=True)
    r_4_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_4_results['S1_Prob'] =((r_4_results['S1%'] - (r_4_results['S1%']*r_4_results['S2%'])) / (r_4_results['S1%'] + r_4_results['S2%'] - (2 * r_4_results['S1%'] * r_4_results['S2%'])))

    r_4_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_4_results.S1_Prob]
    r_4_results['Simulation'] = z + 1
    r_4_sims.append(r_4_results)

# Finals Simulation
###############################
    
    finals_df = []
    for index, row in r_4_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        finals_df.append(data)

    finals_df = pd.DataFrame(finals_df)
    
    final_results = pd.merge(final_match, finals_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    final_results.drop(['Squad 1'], axis=1, inplace=True)
    final_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    final_results = pd.merge(final_results, finals_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    final_results.drop(['Squad 2'], axis=1, inplace=True)
    final_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    final_results['S1_Prob'] =((final_results['S1%'] - (final_results['S1%']*final_results['S2%'])) / (final_results['S1%'] + final_results['S2%'] - (2 * final_results['S1%'] * final_results['S2%'])))

    final_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in final_results.S1_Prob]
    final_results['Simulation'] = z + 1
    final_sims.append(final_results)

In [None]:
final_sims[0]

In [None]:
group_match_sims[0]

In [None]:
knockout_sims

In [None]:
test = sum(group_results[0].get('wins')) / num_sims

In [None]:
test

In [None]:
group_stage_team_wins = []
group_stage_team_wins = 

In [None]:
type(knockout_sims)

In [None]:
num_sims = 10000

for z in range(num_sims):
    if (z > 0) and (z % (num_sims / 10) == 0):
        print(f"Simulation {z} Complete.")
        
    elif z == (num_sims - 1):
        print(f"Simulation Complete - The world cup was simulated {num_sims} times")

In [None]:
winners = []

for x in final_sims:
    if x.S1_wins.values[0]==1:
        winners.append(x['Squad 1'].values[0])
    else:
        winners.append(x['Squad 2'].values[0])   

In [None]:
from collections import Counter
Counter(winners).most_common()

In [None]:
winners