In [None]:
# Code by Ian Loree
# April 8th, 2024

In [None]:
from collections import Counter
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Analyze Results

In [None]:
sim_files = glob.glob(pathname="simulation_results/simulation*.csv")
n_sims = len(sim_files)
simulation_results = np.empty((n_sims, 20), dtype=object)

for i, file in enumerate(sim_files):
    record = pd.read_csv(file)
    placements = pd.Series(record['Result'])
    simulation_results[i] = placements.to_numpy()

In [None]:
# Two-dimensional numpy array of simulation results 
# with each row as a simulation, each column as country, entry as placement 
# team index is same as in teams_list, starting with India as 0

placements = ["Champion", "Finalist", "Semifinalist", "Super 8 stage", "Group stage"]

with open('teams.txt') as file:
    teams_list = file.read().splitlines()

# Ex: Print results for all countries
for i in range(0,20):
    country_results = dict(Counter(simulation_results[:,i]))
    print(teams_list[i], "results:", {k: country_results[k] for k in placements if k in country_results})

In [None]:
# match_records contains all match results from the simulation set.
# This way, we can easily calculate the proportion of matches won
# by one team versus another across the whole simulation set.
# The outcome column is included only in the rare case a tie occurs.
# Below, the match records from the first simulation are printed.
match_records = pd.read_csv('simulation_results/match_records.csv')
match_records[['winner','loser','outcome']].head(55)

In [None]:
def result_bar(team):
    data = dict(Counter(simulation_results[:, teams_list.index(team)]))
    data = {k: data[k] for k in placements if k in data}
    plt.figure(figsize=(10, 6))
    plt.bar(list(data.keys()), list(data.values()))
    plt.xlabel('Placement')
    plt.ylabel('Number of Simulations')
    plt.title(f'Simulation Results for {team}')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
def result_pie(team):
    data = dict(Counter(simulation_results[:, teams_list.index(team)]))
    data = {k: data[k] for k in placements if k in data}
    plt.figure(figsize=(8, 8))
    plt.pie(list(data.values()), labels=list(data.keys()), autopct='%1.1f%%', startangle=140)
    # plt.legend(loc="best", fontsize='medium', title="Placement")
    plt.title(f'Simulation Results for {team}')
    plt.axis('equal')
    plt.show()

In [None]:
def pair_results(teamA, teamB, print_statement=True):
    if teamA == teamB:
        return
    num_victories = len(match_records.loc[(match_records['winner'] == teamA) &
                                           (match_records['loser'] == teamB) &
                                           (match_records['outcome'] == 'w')])
    num_total = len(match_records.loc[((match_records['winner'] == teamA) &
                                           (match_records['loser'] == teamB)) |
                                           ((match_records['loser'] == teamA) &
                                           (match_records['winner'] == teamB))])
    if num_total == 0:
        if print_statement:
            print(f"{teamA} did not play {teamB} in this simulation set.")
    else:
        prop = num_victories/num_total
        if print_statement:
            print(f"{teamA} beat {teamB} {num_victories} times in {num_total} matches, for a winning proportion of {prop:.3f}.")
        return prop

In [None]:
champions = {}
for team in teams_list:
    data = dict(Counter(simulation_results[:, teams_list.index(team)]))
    if 'Champion' in data:
        champions[team] = data["Champion"]*100.00/n_sims
plt.figure(figsize=(10, 6))
plt.bar(list(champions.keys()), list(champions.values()))
plt.xlabel('Country')
plt.ylabel('Percentage of Simulations Won')
plt.title('Championship Teams')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Get bar chart / pie chart for any team here!
team = 'New Zealand'  # Example
result_bar(team)
result_pie(team)

In [None]:
team = 'India'
result_bar(team)
result_pie(team)

In [None]:
# Get proportion of matches teamA won vs teamB
teamA = 'England'
teamB = 'Australia'
_ = pair_results(teamA, teamB)

In [None]:
# Get proportion of matches teamA won vs all teams, and visualize
teamA = 'West Indies'

prop_dict = dict()
for teamB in teams_list:
    prop = pair_results(teamA, teamB, False)
    if prop is not None:
        prop_dict[teamB] = prop

plt.figure(figsize=(10, 6))
plt.bar(list(prop_dict.keys()), list(prop_dict.values()))
plt.xlabel('Opponent')
plt.ylabel('Proportion')
plt.title(f'Proportion of Matches Won for {teamA}')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# Idea: visualizations for total runs scored and total runs conceded
# For a specific team across the simulation set

def plot_runs(team):
    runs_statistics = np.empty((n_sims, 2), dtype=float)
    for i, file in enumerate(sim_files):
        record = pd.read_csv(file)
        runs_statistics[i,0] = record.loc[record['Country'] == team, 'total_runs_scored'].values[0]
        runs_statistics[i,1] = record.loc[record['Country'] == team, 'total_runs_conceded'].values[0]

    # Create density plots
    sns.kdeplot(runs_statistics[:,0], fill=True)
    plt.xlabel('Runs Scored')
    plt.ylabel('Density')
    plt.title(f'Scored Runs for {team}')
    plt.show()

    sns.kdeplot(runs_statistics[:,1], fill=True)
    plt.xlabel('Runs Conceded')
    plt.ylabel('Density')
    plt.title(f'Conceded Runs for {team}')
    plt.show()

In [None]:
team_to_plot = 'United States of America'
plot_runs(team_to_plot)

In [None]:
# Idea: Plot percentage of wins against team versus relative rating?