In [1]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

group_stage_table = pd.read_csv(r'CSVs\Group_Stage_Table.csv' , encoding ='latin-1')
world_cup_matches = pd.read_csv(r'CSVs\World_Cup_Matches.csv', encoding = 'latin-1')
world_cup_teams = pd.read_csv(r'CSVs\World_Cup_Teams.csv', encoding='latin-1')
squad_predictions = pd.read_csv(r'CSVs\Predictions\Predictions_8-21.csv', encoding ='latin-1')

import warnings
warnings.filterwarnings("ignore")

In [None]:
group_stage_matches = world_cup_matches[(world_cup_matches['Stage']=='Group')]

# Need to lookup Squad percentages from Squad_Predictions DF

group_stage_matches['S1%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 1'], right_on=['Squad'])['% of Points Taken']
group_stage_matches['S2%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 2'], right_on=['Squad'])['% of Points Taken']

# Calculate Probability of S1 winning based on Log 5 Formula

group_stage_matches['S1_Prob'] =((group_stage_matches['S1%'] - (group_stage_matches['S1%']*group_stage_matches['S2%'])) / (group_stage_matches['S1%'] + group_stage_matches['S2%'] - (2 * group_stage_matches['S1%'] * group_stage_matches['S2%'])))

In [None]:
squad_predictions.sort_values(by = ['% of Points Taken'], ascending =False)

In [None]:
group_stage_matches.sort_values(by = ['S1_Prob'])

In [None]:
np.random.seed(22)
num_sims = 10000

match_sims = []

for index,row in group_stage_matches.iterrows():
    result = np.random.binomial(n=1, p = row.S1_Prob, size=num_sims)
    match_sims.append(result)

len(match_sims)

In [None]:
group_results

In [None]:
# Sample Graph of group stage wins per x simulations

data = group_results[6]
print(np.mean(data['wins']))
plt.hist(data['wins'])
plt.title(f"Simulation Wins for {data['team']}")
plt.show()

In [None]:
# Simulation Setup

np.random.seed(22)
num_sims = 100

# Tournament Simulation Code Chunk

# Create All WC Round Dataframes and empty lists to hold simulations

group_stage_matches = world_cup_matches[(world_cup_matches['Stage']=='Group')]
r_16_matches = world_cup_matches[(world_cup_matches['Group']=='R16')]
r_8_matches = world_cup_matches[(world_cup_matches['Group']=='R8')]
r_4_matches = world_cup_matches[(world_cup_matches['Group']=='Semi-Finals')]
final_matches = world_cup_matches[(world_cup_matches['Group']=='Finals')]

group_results = []
knockout_sims = []
r_8_sims = []
r_4_sims = []
finals_sims = []


# Group Stage Simulation

# Need to lookup Squad percentages from Squad_Predictions DF

group_stage_matches['S1%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 1'], right_on=['Squad'])['% of Points Taken']
group_stage_matches['S2%']=pd.merge(group_stage_matches, squad_predictions, how='left', left_on=['Squad 2'], right_on=['Squad'])['% of Points Taken']

# Calculate Probability of S1 winning based on Log 5 Formula

group_stage_matches['S1_Prob'] =((group_stage_matches['S1%'] - (group_stage_matches['S1%']*group_stage_matches['S2%'])) / (group_stage_matches['S1%']

# Simulate all group stage matches
                                                                                                                           
for index, row in squad_predictions.iterrows():
    team = row.Squad
    group = row.Group
    team_percent = row["% of Points Taken"]
    
    team_rows = group_stage_matches.loc[(group_stage_matches['Squad 1'] == team)| (group_stage_matches['Squad 2']==team),['Squad 1','Squad 2', 'Group']]
    team_rows['is_squad_1'] = team_rows['Squad 1'] == team
    team_sims = [match_sims[i] for i in team_rows.index.to_list()]
    
    team_wins = []
    for x in range(len(team_rows)):
        is_squad_1 = team_rows.is_squad_1.values[x]
        game_wins = [y if is_squad_1 else int(not y) for y in team_sims[x]]
        team_wins.append(game_wins)
        
    team_results = np.sum(team_wins, axis=0)
    
    
    group_results.append({'team':team, 'group':group, 'team_percent':team_percent, 'wins':team_results, 'points':team_results*3})


#R16 Simulation

###############################

for z in range(num_sims):
    if z % 50 == 0:
        print(z)
    r16_df = [{'team':x['team'], 'group':x['group'], 'team_percent':x['team_percent'], 'wins':x['wins'][z], 'points':x['points'][z]} for x in group_results] 
    r16_df = pd.DataFrame(r16_df)
    r16_df = r16_df.sort_values(by=['group','points', 'team_percent'], ascending=[True,False,False]).reset_index(drop=True)
    r16_df['rank'] = (r16_df.index % 4) + 1
    r16_df['rank'] = r16_df['rank'].astype(str)
    r16_df['seed'] = r16_df['rank'] + r16_df['group']
    
    r_16_results = pd.merge(r_16_matches, r16_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_16_results.drop(['Squad 1'], axis=1, inplace=True)
    r_16_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_16_results = pd.merge(r_16_results, r16_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_16_results.drop(['Squad 2'], axis=1, inplace=True)
    r_16_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_16_results['S1_Prob'] =((r_16_results['S1%'] - (r_16_results['S1%']*r_16_results['S2%'])) / (r_16_results['S1%'] + r_16_results['S2%'] - (2 * r_16_results['S1%'] * r_16_results['S2%'])))

    r_16_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_16_results.S1_Prob]
    r_16_results['Simulation'] = z + 1
    knockout_sims.append(r_16_results)
    
#R8 Simulation
    
###############################
      
    
    r8_df = []
    for index, row in r_16_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        r8_df.append(data)

    r8_df = pd.DataFrame(r8_df)
    
    r_8_results = pd.merge(r_8_matches, r8_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_8_results.drop(['Squad 1'], axis=1, inplace=True)
    r_8_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_8_results = pd.merge(r_8_results, r8_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_8_results.drop(['Squad 2'], axis=1, inplace=True)
    r_8_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_8_results['S1_Prob'] =((r_8_results['S1%'] - (r_8_results['S1%']*r_8_results['S2%'])) / (r_8_results['S1%'] + r_8_results['S2%'] - (2 * r_8_results['S1%'] * r_8_results['S2%'])))

    r_8_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_8_results.S1_Prob]
    r_8_results['Simulation'] = z + 1
    r_8_sims.append(r_8_results)
    
#Semi-finals Simulation
###############################
    
    r_4_df = []
    for index, row in r_8_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        r_4_df.append(data)

    r_4_df = pd.DataFrame(r_4_df)
    
    r_4_results = pd.merge(r_4_matches, r_4_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    r_4_results.drop(['Squad 1'], axis=1, inplace=True)
    r_4_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    r_4_results = pd.merge(r_4_results, r_4_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    r_4_results.drop(['Squad 2'], axis=1, inplace=True)
    r_4_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    r_4_results['S1_Prob'] =((r_4_results['S1%'] - (r_4_results['S1%']*r_4_results['S2%'])) / (r_4_results['S1%'] + r_4_results['S2%'] - (2 * r_4_results['S1%'] * r_4_results['S2%'])))

    r_4_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_4_results.S1_Prob]
    r_4_results['Simulation'] = z + 1
    r_4_sims.append(r_4_results)
    
    
# Finals Simulation
###############################
    
    finals_df = []
    for index, row in r_4_results.iterrows():
        match = row.Match
        seed = 'M'+str(match)
        if row.S1_wins == 1:
            team = row['Squad 1']
            team_percent = row['S1%']
        else:
            team = row['Squad 2']
            team_percent = row['S2%']

        data = {'seed':seed,'team':team,'team_percent':team_percent}
        finals_df.append(data)

    finals_df = pd.DataFrame(finals_df)
    
    finals_results = pd.merge(final_matches, finals_df.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
    finals_results.drop(['Squad 1'], axis=1, inplace=True)
    finals_results.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

    finals_results = pd.merge(finals_1, finals_df.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
    finals_results.drop(['Squad 2'], axis=1, inplace=True)
    finals_results.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

    finals_results['S1_Prob'] =((finals_results['S1%'] - (finals_results['S1%']*finals_results['S2%'])) / (finals_results['S1%'] + finals_results['S2%'] - (2 * finals_results['S1%'] * finals_results['S2%'])))

    finals_results['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in finals_results.S1_Prob]
    finals_results['Simulation'] = z + 1
    finals_sims.append(finals_results)
    

In [None]:
all_finals = pd.DataFrame()
for x in finals_sims:
    all_finals = all_finals.append(x)

In [None]:
all_finals.head()

In [None]:
winners = []

for x in finals_sims:
    if x.S1_wins.values[0]==1:
        winners.append(x['Squad 1'].values[0])
    else:
        winners.append(x['Squad 2'].values[0])    
    

In [None]:
from collections import Counter


In [None]:
sum(Counter(winners).values())

In [None]:
Counter(winners).most_common()

In [None]:
finals_sims[0]

In [None]:
r_4_sims[0]

In [None]:
r_8_sims[0]

In [None]:
knockout_sims[0]

In [None]:
knockout_sims[5]

In [None]:
r_8_matches = world_cup_matches[(world_cup_matches['Group']=='R8')]
r_8_matches

In [None]:
r_8_sims = []
for z in range(num_sims):
    test = knockout_sims[0]

In [None]:
test = knockout_sims[0]
test.head()

In [None]:
lol_2 = []
for index, row in test.iterrows():
    match = row.Match
    seed = 'M'+str(match)
    if row.S1_wins == 1:
        team = row['Squad 1']
        team_percent = row['S1%']
    else:
        team = row['Squad 2']
        team_percent = row['S2%']
        
    data = {'seed':seed,'team':team,'team_percent':team_percent}
    lol_2.append(data)

lol_2 = pd.DataFrame(lol_2)
lol_2

In [None]:


r_8_1 = pd.merge(r_8_matches, lol_2.loc[:,['team','team_percent','seed']], left_on='Squad 1', right_on='seed')
r_8_1.drop(['Squad 1'], axis=1, inplace=True)
r_8_1.rename(columns={'team':'Squad 1','team_percent':'S1%', 'seed':'Squad 1 Seed'}, inplace=True)

r_8_1 = pd.merge(r_8_1, lol_2.loc[:,['team','team_percent','seed']], left_on='Squad 2', right_on='seed')
r_8_1.drop(['Squad 2'], axis=1, inplace=True)
r_8_1.rename(columns={'team':'Squad 2','team_percent':'S2%', 'seed':'Squad 2 Seed'}, inplace=True)

r_8_1['S1_Prob'] =((r_8_1['S1%'] - (r_8_1['S1%']*r_8_1['S2%'])) / (r_8_1['S1%'] + r_8_1['S2%'] - (2 * r_8_1['S1%'] * r_8_1['S2%'])))

r_8_1['S1_wins'] = [np.random.binomial(n=1, p=x, size=1)[0] for x in r_8_1.S1_Prob]

r_8_1

In [None]:
# Group stage simulation, this code is also in tournament sim code chunk

group_results = []
for index, row in squad_predictions.iterrows():
    team = row.Squad
    group = row.Group
    team_percent = row["% of Points Taken"]
    
    team_rows = group_stage_matches.loc[(group_stage_matches['Squad 1'] == team)| (group_stage_matches['Squad 2']==team),['Squad 1','Squad 2', 'Group']]
    team_rows['is_squad_1'] = team_rows['Squad 1'] == team
    team_sims = [match_sims[i] for i in team_rows.index.to_list()]
    
    team_wins = []
    for x in range(len(team_rows)):
        is_squad_1 = team_rows.is_squad_1.values[x]
        game_wins = [y if is_squad_1 else int(not y) for y in team_sims[x]]
        team_wins.append(game_wins)
        
    team_results = np.sum(team_wins, axis=0)
    
    
    group_results.append({'team':team, 'group':group, 'team_percent':team_percent, 'wins':team_results, 'points':team_results*3})
    