In [1]:
import obj
import sim
import pandas as pd
import numpy as np
import warnings
from scipy.stats import norm
import itertools
warnings.filterwarnings('ignore')
import concurrent.futures
import time

sims = pd.read_csv('sims.csv')

In [2]:
# Getting all Combos of USA Men --> combos

us_men = obj.m_country('USA').ath_perf
low = {}
top_5_AA_US = dict(sorted(us_men.items(), key=lambda item: sum(item[1]['mean']), reverse = True)[:5])
for ath in top_5_AA_US:
    for app in top_5_AA_US[ath].itertuples():
        if app.Index not in low:
            low[app.Index] = [app.mean]
        else:
            low[app.Index].append(app.mean)
low_4 = {k: sorted(v)[4] for k, v in low.items()}

considered_us_men = set()
for ath in us_men:
    for app in us_men[ath].itertuples():
        if app.mean >= low_4[app.Index]:
            considered_us_men.add(ath)
combos = list(itertools.combinations(list(considered_us_men), 5)) # len() =  CHOOSE 5
# men are considered for lineup combinations if, in any apparatus, their average score is better than 3 of the top 5 All Around US Men

In [3]:
# function to simulate a qualifying round for a given combo of USA men

USA_men_performances = obj.m_country('USA').ath_perf
def set_lineup_from_names(name_combo):
    country_comp = {'FX': [], 'HB': [], 'PB': [], 'PH': [], 'SR': [], 'VT': []}
    country_mean_scores = {'FX': [], 'HB': [], 'PB': [], 'PH': [], 'SR': [], 'VT': []}
    score_range = np.arange(12, 17, 0.001)
    for gymnast in name_combo: # select which 4 athletes will compete in each apparatus
        for apparatus in USA_men_performances[gymnast].itertuples():
            if len(country_comp[apparatus.Index]) < 4:
                dist = norm.cdf(score_range, loc=apparatus.mean, scale=apparatus.std_dev)
                country_comp[apparatus.Index].append((apparatus.mean, apparatus.std_dev, dist, gymnast, 'USA'))
                country_mean_scores[apparatus.Index].append(apparatus.mean)

            elif apparatus.mean > min(country_mean_scores[apparatus.Index]):
                dist = norm.cdf(score_range, loc=apparatus.mean, scale=apparatus.std_dev)
                country_comp[apparatus.Index].append((apparatus.mean, apparatus.std_dev, dist, gymnast, 'USA'))
                for i, p in enumerate(country_comp[apparatus.Index]):
                    if min(country_mean_scores[apparatus.Index]) == p[0]:
                        country_comp[apparatus.Index].pop(i)       
    qual_scores = {}
    for apparatus in country_comp: # Run Qualifying Rounds
        for athlete in country_comp[apparatus]:
            qual_scores[athlete[3] + '-' + athlete[4] + '_' + apparatus] = np.random.normal(athlete[0], athlete[1], 100)
    return pd.DataFrame(qual_scores)

In [4]:
import concurrent.futures
t = time.time()
sim_results = []
def simulate_simulation(simulation_id):
    if simulation_id % 100 == 0:
        print(simulation_id, time.time() - t)
        pd.DataFrame(sim_results).to_csv('results.csv')
    us_sims = set_lineup_from_names(combos[simulation_id]) # get US qualifying round sims
    full_competition = pd.concat([sims, us_sims], axis = 1) # combine US qualifying round sims with the rest of the field
    final_medals = sim.qual_to_medals(full_competition) # get the simulation of medals per 10,000 sims
    medals = {'gymnasts': combos[simulation_id]}
    for comp_type in final_medals: # organize medals output
        for medal_type in final_medals[comp_type]['USA']:
            medals[comp_type + ' ' + medal_type] = final_medals[comp_type]['USA'][medal_type]
    sim_results.append(medals)
    return medals # dictionary ready to be made into a dataframe

# Number of simulations
num_simulations = len(combos)

# Number of parallel processes (cores)
num_cores = 8

# Create a list of simulation IDs
simulation_ids = list(range(num_simulations))

# Function to run simulations concurrently
def run_simulations_parallel(simulation_ids):
    with concurrent.futures.ProcessPoolExecutor(max_workers=num_cores) as executor:
        # Using executor.map to parallelize the simulations
        results = list(executor.map(simulate_simulation, simulation_ids))

    return results

# Run simulations concurrently
simulation_results = run_simulations_parallel(simulation_ids)

# Print the results (replace with actual processing of results)
print("Simulation Results:", simulation_results)
print(t - time.time())

0 0.0677788257598877
100 1939.930135011673
200 4021.955291032791
300 5970.73122549057
400 8042.732214689255
500 10008.648090362549
600 12065.264466524124
700 14051.76832652092
800 16106.066804885864
900 18092.629230737686
1000 20122.481080770493
1100 22139.03089284897
1200 24158.468047857285
1300 26173.246618509293
1400 28196.49703860283
1500 30187.63721871376
1600 32198.394725322723
1700 34191.3357963562
1800 36204.650992155075
1900 38196.22829389572
2000 40190.759176015854
Simulation Results: [{'gymnasts': ('Riley LOOS', 'Dallas HALE', 'Vitaliy GUIMARAES', 'Colt WALKER', 'Brody MALONE'), 'Apparatus gold': 3351, 'Apparatus silver': 4465, 'Apparatus bronze': 4817, 'Indv. AA gold': 1075, 'Indv. AA silver': 2229, 'Indv. AA bronze': 2714, 'Team AA gold': 5026, 'Team AA silver': 2935, 'Team AA bronze': 1110}, {'gymnasts': ('Riley LOOS', 'Dallas HALE', 'Vitaliy GUIMARAES', 'Colt WALKER', 'Yul MOLDAUER'), 'Apparatus gold': 2895, 'Apparatus silver': 3974, 'Apparatus bronze': 4287, 'Indv. AA g

In [7]:
pd.DataFrame(simulation_results).to_csv('men_medal.csv')

In [8]:
len(simulation_results)

2002