In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import itertools
import obj # my .py file with classes for individual athletes and countries

pd.options.mode.chained_assignment = None

In [2]:
women = pd.read_csv('women.csv')
men = pd.read_csv('men.csv')

**Create a projected field of gymnasts for which we can estimate off of**

In [3]:
men_quals = ['CHN', 'JPN', 'GBR', 'USA', 'CAN', 'GER', 'ITA', 'SUI', 'ESP', 'TUR', 'NED', 'UKR']

men['qual_team'] = men['Olympic_Nation'].isin(men_quals) # new column to separate athletes from countries that have already qualified

In [4]:
# Create a dictionary with the expected 5 gymnasts for each of the qualifying teams

m_comp_dict = {} # dictionary of key = country, value = dictionary of key = Gymnast, value = DataFrame of Average & SD Performance in each apparatus
no_USA = men_quals.copy()
no_USA.remove('USA')
for country in no_USA: # getting 5 athletes that all other team qualifiers will send (prediction)
    m_comp_dict[country] = obj.m_country(country).top_5
    
q_ath = set()
for country in m_comp_dict:
    for ath in m_comp_dict[country]:
        q_ath.add(ath)

In [5]:
# Already Qualified Individuals from the 2023 World Championships: https://usagym.org/events/2023-artistic-gymnastics-world-championships/

m_App_qual = [
    ('Tin SRBIC', 'HB'),
    ('Noah KUAVITA', 'PB'),
    ('Kevin PENEV', 'VT'),
    ('Eleftherios PETROUNIAS', 'SR'),
    ('Rhys MCCLENAGHAN', 'PH'),
    ('Carlos YULO', 'FX')]

m_AA_qual = ['Andrei MUNTEAN',
           'Luka VANDENKEYBUS',
           'Diogo SOARES',
           'Junho LEE',
           'Krisztofer MESZAROS',
           'Artur DAVTYAN',
           'Artem DOLGOPYAT',
           'Milad KARIMI', 
           'Audrys NIN', 
           'Valgard REINHARDSSON'] # manually chosen Tripartite selection

In [6]:
# Estimating who will be sent from the first 3 countries that did not make it as a team, as well as France, because they are the host country

m_countries_1 = ['BRA', 'KOR', 'BEL', 'FRA']
idv_qualed = [a[0] for a in m_App_qual] + m_AA_qual # all previously indv. qualifiers
for country_1 in m_countries_1: # finding the athlete with the highest all around score for each country
    country_aths = obj.m_country(country_1).ath_perf
    top_score = 0
    top_scorer = None
    for ath in country_aths:
        if len(country_aths[ath]) == 6 and np.mean(country_aths[ath]['mean']) > top_score and ath not in idv_qualed:
            top_score = np.mean(country_aths[ath]['mean'])
            top_scorer = ath
    m_AA_qual.append(top_scorer)

In [7]:
# update qual column so all individual qualifiers so far are included as True
idv_qualed = [a[0] for a in m_App_qual] + m_AA_qual
men.loc[men['Name'].isin(idv_qualed), 'qual_team'] = True 

In [8]:
remaining = men[men['qual_team'] == False]['Name'].unique()
last_4_AA = {}
tracking_AA_d = {}
for ath in remaining: # finding the 4 best remaining all around
    ath_summ = obj.male_gymnast(ath).summ_table
    if len(ath_summ) == 6:
        if len(last_4_AA) < 4:
            last_4_AA[ath] = sum(ath_summ['mean'])
            tracking_AA_d[sum(ath_summ['mean'])] = ath
        elif sum(ath_summ['mean']) > min(last_4_AA.values()):
            last_4_AA[ath] = sum(ath_summ['mean'])
            tracking_AA_d[sum(ath_summ['mean'])] = ath
            last_4_AA.pop(tracking_AA_d[min(last_4_AA.values())])

m_AA_qual.extend(last_4_AA.keys())

In [9]:
last_2_App = {'FX': {}, 'HB': {}, 'PB': {}, 'PH': {}, 'SR': {}, 'VT': {}}

for ath in remaining: # finding the top 2 remaining apparatus athletes for each apparatus
    if ath not in last_4_AA:
        ath_summ = obj.male_gymnast(ath).summ_table
        if ath not in last_4_AA:
            for app in ath_summ.itertuples():
                last_2_App[app.Index][ath] = app.mean

In [10]:
for app in last_2_App: # add last 2 into larger apparatus qualifiers
    app_scores = last_2_App[app]
    top_aths = sorted(app_scores, key=lambda k: app_scores[k], reverse=True)[:2]
    for ath in top_aths:
        m_App_qual.append((ath, app))

In [11]:
for AA in m_AA_qual: # add indv all around athletes to all competitors pool
    a = obj.male_gymnast(AA)
    if a.country in m_comp_dict:
        m_comp_dict[a.country][AA] = a.summ_table
    else:
        m_comp_dict[a.country] = {AA: a.summ_table}

In [12]:
for App in m_App_qual: # add indv apparatus athletes to all competitors pool
    a = obj.male_gymnast(App[0])
    app_row = a.summ_table.loc[App[1]].to_frame().T
    if a.country in m_comp_dict:
        m_comp_dict[a.country][App[0]] = app_row
    else:
        m_comp_dict[a.country] = {App[0]: app_row}

In [13]:
# adding all athletes in competitors pool to pool separated by apparatus
qual = {'FX': [], 'HB': [], 'PB': [], 'PH': [], 'SR': [], 'VT': []}
score_range = np.arange(12, 17, 0.001)
for country in m_comp_dict:
    country_comp = {'FX': [], 'HB': [], 'PB': [], 'PH': [], 'SR': [], 'VT': []}
    country_mean_scores = {'FX': [], 'HB': [], 'PB': [], 'PH': [], 'SR': [], 'VT': []}
    for gymnast in m_comp_dict[country]: # filtering down country qualifiers to only have 4 athletes per apparatus
        for apparatus in m_comp_dict[country][gymnast].itertuples():
            if len(country_comp[apparatus.Index]) < 4:
                dist = norm.cdf(score_range, loc=apparatus.mean, scale=apparatus.std_dev)
                country_comp[apparatus.Index].append((apparatus.mean, apparatus.std_dev, dist, gymnast, country))
                country_mean_scores[apparatus.Index].append(apparatus.mean)
           
            elif apparatus.mean > min(country_mean_scores[apparatus.Index]):
                dist = norm.cdf(score_range, loc=apparatus.mean, scale=apparatus.std_dev)
                country_comp[apparatus.Index].append((apparatus.mean, apparatus.std_dev, dist, gymnast, country))
                for i, p in enumerate(country_comp[apparatus.Index]):
                    if min(country_mean_scores[apparatus.Index]) == p[0]:
                        country_comp[apparatus.Index].pop(i)                        
    for apparatus in qual:
        qual[apparatus].extend(country_comp[apparatus])

  x = np.asarray((x - loc)/scale, dtype=dtyp)


In [14]:
men_apps = ['FX', 'PH', 'SR', 'VT', 'PB', 'HB']
n = 100
# creating 100 simulated qualifying rounds
qual_scores = {}
athlete_dict = {}
for apparatus in men_apps: # Run Qualifying Rounds
    for athlete in qual[apparatus]:
        qual_scores[athlete[3] + '-' + athlete[4] + '_' + apparatus] = np.random.normal(athlete[0], athlete[1], n)
        athlete_dict[athlete[3] + '-' + athlete[4] +'_' + apparatus] = athlete
sims = pd.DataFrame(qual_scores)

In [15]:
sims.to_csv('m_sims.csv', index = False)