In [12]:
import pandas as pd
import trueskillthroughtime as ttt
import unidecode
import numpy as np
import os
import json

# ifsc.results.info data to trueskill
takes the data directory and creates trueskill rankings. can be filtered using the selected events

In [13]:
def create_boulder_composition(round):
    filter = {'athlete_id', 'rank', 'name', 'country'}
    filtered_rounds = []
    for player in round:
        # print(player)
        # Create a dictionary for each player with only the desired keys
        filtered_player = {k: player[k] for k in filter if k in player}
        # Append the filtered dictionary to the list
        for player_round in player['rounds']:
            match player_round['round_name']:
                case 'Qualification' | 'Final' | 'Semi-Final': # maybe fuzzy match qualification a/b options, must test with 2018 years older...
                    round_type = player_round['round_name'].replace('-', '').lower()  # Normalize the round name
                    if 'speed_elimination_stages' in player_round:
                        ascents = player_round['speed_elimination_stages']['ascents'] # grp a/b seems to remove speed elim stage
                    else:
                        ascents = player_round['ascents']
                                        
                    
                    # Enumerate through ascents and extract required information
                    for index, boulder in enumerate(ascents):
                        # We will create a consistent data structure for easy access
                        boulder_info = {key: boulder[key] for key in ['id', 'top', 'top_tries', 'zone', 'zone_tries','starting_group'] if key in boulder}
                        filtered_player[f"{round_type}_boulder_{index + 1}"] = boulder_info
                    
        filtered_rounds.append(filtered_player)
        
    frame = pd.DataFrame(filtered_rounds)
    #print(frame.columns)

    # below creates per boulder rankings, unused yet. TODO use them in trueskill
    n_qualis = frame.columns.str.contains('qualification_boulder_').sum()
    for i in range(1, n_qualis + 1):
        frame[f'q_b_{i}_top'] = frame[f'qualification_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['top'] else np.NaN)
        frame[f'q_b_{i}_top_tries'] = frame[f'qualification_boulder_{i}'].apply(lambda x: x['top_tries'] if isinstance(x, dict) else np.NaN)
        frame[f'q_b_{i}_zone'] = frame[f'qualification_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['zone'] else np.NaN)
        frame[f'q_b_{i}_zone_tries'] = frame[f'qualification_boulder_{i}'].apply(lambda x: x['zone_tries'] if isinstance(x, dict) else np.NaN)

        #per boulder ranking
        frame[[f'athlete_id',f'q_b_{i}_top',f'q_b_{i}_top_tries',f'q_b_{i}_zone',f'q_b_{i}_zone_tries']].sort_values([f'q_b_{i}_top',f'q_b_{i}_top_tries',f'q_b_{i}_zone',f'q_b_{i}_zone_tries'],na_position='last')
        frame[f'q_b_{i}_rank'] = frame[[f'q_b_{i}_top',f'q_b_{i}_top_tries',f'q_b_{i}_zone',f'q_b_{i}_zone_tries']].apply(tuple,axis=1).rank(method='dense',ascending=True) # i still dont understand why this works 
    
    n_finals = sum('finals' in col for col in frame.columns) # 
    for i in range(1, n_finals + 1):
        frame[f'f_b_{i}_top'] = frame[f'final_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['top'] else np.NaN)
        frame[f'f_b_{i}_top_tries'] = frame[f'final_boulder_{i}'].apply(lambda x: x['top_tries'] if isinstance(x, dict) else np.NaN)
        frame[f'f_b_{i}_zone'] = frame[f'final_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['zone'] else np.NaN)
        frame[f'f_b_{i}_zone_tries'] = frame[f'final_boulder_{i}'].apply(lambda x: x['zone_tries'] if isinstance(x, dict) else np.NaN)
        
        frame[[f'athlete_id',f'f_b_{i}_top',f'f_b_{i}_top_tries',f'f_b_{i}_zone',f'f_b_{i}_zone_tries']].sort_values([f'f_b_{i}_top',f'f_b_{i}_top_tries',f'f_b_{i}_zone',f'f_b_{i}_zone_tries'],na_position='last')
        frame[f'f_b_{i}_rank'] = frame[[f'f_b_{i}_top',f'f_b_{i}_top_tries',f'f_b_{i}_zone',f'f_b_{i}_zone_tries']].apply(tuple,axis=1).rank(method='dense',ascending=True)
        
        
    n_semis = frame.columns.str.contains('semifinal_boulder_').sum()
    for i in range(1, n_semis + 1):
        frame[f's_b_{i}_top'] = frame[f'semifinal_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['top'] else np.NaN)
        frame[f's_b_{i}_top_tries'] = frame[f'semifinal_boulder_{i}'].apply(lambda x: x['top_tries'] if isinstance(x, dict) else np.NaN)
        frame[f's_b_{i}_zone'] = frame[f'semifinal_boulder_{i}'].apply(lambda x: 1 if isinstance(x, dict) and x['zone'] else np.NaN)
        frame[f's_b_{i}_zone_tries'] = frame[f'semifinal_boulder_{i}'].apply(lambda x: x['zone_tries'] if isinstance(x, dict) else np.NaN)
        
        frame[[f'athlete_id',f's_b_{i}_top',f's_b_{i}_top_tries',f's_b_{i}_zone',f's_b_{i}_zone_tries']].sort_values([f's_b_{i}_top',f's_b_{i}_top_tries',f's_b_{i}_zone',f's_b_{i}_zone_tries'],na_position='last')
        frame[f's_b_{i}_rank'] = frame[[f's_b_{i}_top',f's_b_{i}_top_tries',f's_b_{i}_zone',f's_b_{i}_zone_tries']].apply(tuple,axis=1).rank(method='dense',ascending=True)
    
    # create a composition for each boulder, with athlete_id as a player

    # frame['boulder_1_ranks'] = frame['boulder_1'].apply(lambda x: x.split(' ')[0]
    return frame#[['athlete_id','rank','name','qualification_boulder_1','boulder_1_top','boulder_1_top_tries','boulder_1_zone','boulder_1_zone_tries']]

# create_composition(get_boulder(round))#.sort_values(['boulder_1_top','boulder_1_top_tries','boulder_1_zone', 'boulder_1_zone_tries'],na_position='last').head(30)
#get_boulder(round)

def create_lead_composition(round):
    filter = {'athlete_id', 'rank', 'name', 'country'}
    filtered_rounds = []
    for player in round:
        # print(player)
        # Create a dictionary for each player with only the desired keys
        filtered_player = {k: player[k] for k in filter if k in player}
        # Append the filtered dictionary to the list
        # print(player)
        # if uiaa rounds is empty
        if (len(player['rounds']) == 0): #uiaa
            # print('uiaa')
            boulder_info = {key: player[key] for key in ['id', 'top', 'plus', 'rank', 'corrective_rank','score'] if key in player}
            filtered_player[f"uiaa_overall_lead_result"] = boulder_info
                        
            filtered_rounds.append(filtered_player)

            
        elif 'ascents' not in player['rounds'][0]: # 2018-2019
            for player_round in player['rounds']:
                # print(player_round.keys())
                match player_round['round_name'].lower():
                    case 'qualification' | 'final' | 'semi-final':
                        round_type = player_round['round_name'].replace('-', '').lower()  # Normalize the round name
                        boulder_info = {key: player_round[key] for key in ['id', 'top', 'plus', 'rank', 'corrective_rank','score'] if key in player_round}
                        filtered_player[f"{round_type}_lead"] = boulder_info
                        
            filtered_rounds.append(filtered_player) 
            
        else: # 2020 onwards
            for player_round in player['rounds']:
                # print(player_round.keys())
                match player_round['round_name'].lower():
                    case 'qualification' | 'final' | 'semi-final': # maybe fuzzy match qualification a/b options, must test with 2018 years older...
                        round_type = player_round['round_name'].replace('-', '').lower()  # Normalize the round name
                        ascents = player_round['ascents']
                                            
                        
                        # Enumerate through ascents and extract required information
                        for index, boulder in enumerate(ascents):
                            # We will create a consistent data structure for easy access
                            boulder_info = {key: boulder[key] for key in ['id', 'top', 'plus', 'rank', 'corrective_rank','score'] if key in boulder}
                            filtered_player[f"{round_type}_lead_{index + 1}"] = boulder_info
                        
            filtered_rounds.append(filtered_player)
            
        
    frame = pd.DataFrame(filtered_rounds)
    return frame

In [23]:
#main process

data_folder = '../data'

output_folder = os.path.join(data_folder, 'output')
output_events_folder = os.path.join(data_folder, 'outputEvents')
full_results_folder = os.path.join(data_folder, 'outputFullResults')
athlete_folder = os.path.join(data_folder, 'athlete')

output_data = []
output_events_data = []
full_results_data = []
athlete_data = []

# Load data from output folder
for filename in os.listdir(output_folder):
    if filename.endswith('.json'):
        file_path = os.path.join(output_folder, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            output_data.append(data)

# Load data from outputEvents folder
for filename in os.listdir(output_events_folder):
    if filename.endswith('.json'):
        file_path = os.path.join(output_events_folder, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            output_events_data.append(data)

# Load data from FullResults folder
for filename in os.listdir(full_results_folder):
    if filename.endswith('.json'):
        file_path = os.path.join(full_results_folder, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            data['filename'] = filename
            full_results_data.append(data)

for filename in os.listdir(athlete_folder):
    if filename.endswith('.json'):
        file_path = os.path.join(athlete_folder, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            athlete_data.append(data)
# Load data from athlete folder 


allEvents = pd.DataFrame(full_results_data)
allEvents['ranking'].dropna(inplace=True)
allEvents['ranking_as_of'].dropna(inplace=True)
allEvents['date'] = pd.to_datetime(allEvents['ranking_as_of'] , errors='coerce')

athletes = pd.DataFrame(athlete_data)
athletes = athletes[['id', 'firstname', 'lastname', 'birthday', 'gender', 'country', 'all_results']]
athletes['all_results']
def split_filename(name):
    return int(name.split('_')[1])

allEvents['id'] = allEvents['filename'].apply(split_filename)
allEvents['cid'] = allEvents['filename'].apply(lambda x: x.split('_')[2][:-5]) # selects the last number split by _ and removes the .json (5 char)
allEvents.set_index('id', inplace=True)
events = pd.DataFrame(output_events_data)
events.set_index('id', inplace=True)

joined_df = allEvents.join(events)
joined_df.dropna(subset=['ranking'], inplace=True)

joined_df[joined_df.ranking.notna()][['event', 'starts_at', 'dcat', 'ranking', 'location','cid', 'league_id', 'league_season_id', 'season_id']].sort_values('starts_at')

selected_events = joined_df[
    (joined_df['league_id'] == 1) & (
        (joined_df['dcat'] == 'LEAD Men')  
        # (joined_df['dcat'] == 'LEAD Women')  
        #  (joined_df['dcat'] == 'BOULDER Men')  
        # (joined_df['dcat'] == 'BOULDER Women')
    ) &
    (joined_df['starts_at'] > '1990-01-01') & # 2007 is the first year of the IFSC - pre 2007 per boulder rankings dont exist
    (joined_df['starts_at'] < '2024-01-01')
][['event', 'starts_at', 'dcat','cid', 'ranking', 'location', 'league_id', 'league_season_id', 'season_id']].sort_values('starts_at')
selected_events

# start_date = selected_events['starts_at'].min().strftime('%Y-%m-%d')
# end_date = selected_events['starts_at'].max().strftime('%Y-%m-%d')


# print("Events Selected from ", start_date , 'to ', end_date)

processed_events = []
errored_events = []
for event in selected_events.itertuples(index=True):
    # print(event.Index)
    try:
        if 'LEAD' in event.dcat:
                
            c = create_lead_composition(event.ranking)
            processed_events.append({'event_id': event.Index,'cid':event.cid,'event_name':event.event, 'starts_at':event.starts_at,'dataframe': c}) # dict of eventID and then the comp df
            
        elif 'BOULDER' in event.dcat:
            c = create_lead_composition(event.ranking)
            processed_events.append({'event_id': event.Index,'cid':event.cid,'event_name':event.event, 'starts_at':event.starts_at,'dataframe': c})
            
    except:
        errored_events.append(event)
        print("errored: ", event.event)

   
print(f"{len(processed_events)} processed events and {len(errored_events)} errored events")        
    



temp = pd.DataFrame(processed_events)
test_comp_final_results = []
for row in temp.itertuples():
    test_comp_final_results.append(row.dataframe.athlete_id.apply(lambda x: [x]).to_list())

h = ttt.History(composition=test_comp_final_results)
h.convergence()
lc = pd.DataFrame.from_dict(h.learning_curves(),orient='index')
lc['current'] = lc.apply(lambda x: x[x.last_valid_index()][1],axis=1)
lc['sigma'] = lc['current'].apply(lambda x: x.sigma)
lc['mu'] = lc['current'].apply(lambda x: x.mu)
lc[['current', 'sigma', 'mu']].sort_values(by=['mu'],ascending=False).head(20)
lc.join(athletes.set_index('id'))[['firstname','lastname','current', 'sigma', 'mu']].sort_values(by=['mu'],ascending=False)#.to_csv('output_men.csv')

235 processed events and 0 errored events
Iteration =  0 , step =  (15.350534730414807, 3.0519328189930195)
Iteration =  1 , step =  (0.5684863374554876, 0.22558438614535437)
Iteration =  2 , step =  (0.17076307870202356, 0.03613752463474951)
Iteration =  3 , step =  (0.12098092566796748, 0.011063267241428143)
Iteration =  4 , step =  (0.09761741633448562, 0.00946510070885509)
Iteration =  5 , step =  (0.08420519817136662, 0.008303100583678447)
Iteration =  6 , step =  (0.0741393174285514, 0.0074100985539096165)
Iteration =  7 , step =  (0.06624195669363431, 0.006705745628033277)
Iteration =  8 , step =  (0.05997126973368308, 0.0061423196456691365)
Iteration =  9 , step =  (0.05494301261884971, 0.005687238773770531)
Iteration =  10 , step =  (0.05087504558367875, 0.005316565663595174)
Iteration =  11 , step =  (0.04755482505600664, 0.005012082656503658)
Iteration =  12 , step =  (0.04485906021805297, 0.004759689964076497)
Iteration =  13 , step =  (0.04265733912232217, 0.00454838480272

Unnamed: 0,firstname,lastname,current,sigma,mu
13040,Sorato,ANRAKU,"N(mu=4.133, sigma=0.460)",0.459860,4.133474
4665,Chris,SHARMA,"N(mu=4.019, sigma=0.497)",0.496898,4.019307
4470,Alexandre,CHABOT,"N(mu=3.925, sigma=0.197)",0.197362,3.924932
1364,Adam,ONDRA,"N(mu=3.911, sigma=0.201)",0.201131,3.911029
1214,Jakob,SCHUBERT,"N(mu=3.766, sigma=0.185)",0.185416,3.765912
...,...,...,...,...,...
1787,Jordan,ROMIG,"N(mu=-11.361, sigma=3.024)",3.023707,-11.361081
4074,İsmai̇l,ÖZTÜRK,"N(mu=-11.844, sigma=2.938)",2.937753,-11.844418
6459,Elias,GEORGES DIAB,"N(mu=-11.919, sigma=3.015)",3.014717,-11.918756
6448,Bror Morten,RANUM,"N(mu=-12.882, sigma=2.897)",2.896560,-12.882007


In [24]:
#save to csv?
lc.join(athletes.set_index('id'))[['firstname','lastname','current', 'sigma', 'mu']].sort_values(by=['mu'],ascending=False)#.to_csv('output_lead_men_1990-2023.csv')

In [36]:
#select boulder and lead events, extract r0ound data into objects tagged with event info like id, cid, name, date. essential. 
lc.join(athletes.set_index('id')).sort_values(by=['mu'],ascending=False)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,current,sigma,mu,firstname,lastname,birthday,gender,country,all_results
13040,"(229, N(mu=4.107, sigma=0.459))","(230, N(mu=4.111, sigma=0.458))","(231, N(mu=4.117, sigma=0.458))","(232, N(mu=4.121, sigma=0.458))","(233, N(mu=4.126, sigma=0.458))","(234, N(mu=4.130, sigma=0.459))","(235, N(mu=4.133, sigma=0.460))",,,,...,,"N(mu=4.133, sigma=0.460)",0.459860,4.133474,Sorato,ANRAKU,2006-11-14,male,JPN,"[{'season': '2023', 'rank': 1, 'discipline': '..."
4665,"(33, N(mu=4.013, sigma=0.497))","(36, N(mu=4.014, sigma=0.497))","(37, N(mu=4.016, sigma=0.496))","(38, N(mu=4.017, sigma=0.496))","(63, N(mu=4.019, sigma=0.497))",,,,,,...,,"N(mu=4.019, sigma=0.497)",0.496898,4.019307,Chris,SHARMA,,male,USA,"[{'season': '2010', 'rank': 18, 'discipline': ..."
4470,"(45, N(mu=4.064, sigma=0.199))","(46, N(mu=4.065, sigma=0.197))","(47, N(mu=4.067, sigma=0.195))","(48, N(mu=4.068, sigma=0.193))","(49, N(mu=4.069, sigma=0.191))","(50, N(mu=4.071, sigma=0.190))","(51, N(mu=4.073, sigma=0.188))","(52, N(mu=4.076, sigma=0.187))","(53, N(mu=4.077, sigma=0.185))","(54, N(mu=4.079, sigma=0.184))",...,,"N(mu=3.925, sigma=0.197)",0.197362,3.924932,Alexandre,CHABOT,,male,FRA,"[{'season': '2007', 'rank': 16, 'discipline': ..."
1364,"(121, N(mu=3.897, sigma=0.198))","(122, N(mu=3.897, sigma=0.196))","(123, N(mu=3.897, sigma=0.194))","(124, N(mu=3.896, sigma=0.192))","(125, N(mu=3.894, sigma=0.190))","(126, N(mu=3.891, sigma=0.189))","(127, N(mu=3.889, sigma=0.187))","(128, N(mu=3.886, sigma=0.185))","(129, N(mu=3.884, sigma=0.184))","(130, N(mu=3.880, sigma=0.183))",...,,"N(mu=3.911, sigma=0.201)",0.201131,3.911029,Adam,ONDRA,1993-02-05,male,CZE,"[{'season': '2023', 'rank': 4, 'discipline': '..."
1214,"(114, N(mu=3.642, sigma=0.182))","(115, N(mu=3.642, sigma=0.179))","(116, N(mu=3.642, sigma=0.177))","(117, N(mu=3.642, sigma=0.175))","(118, N(mu=3.646, sigma=0.173))","(119, N(mu=3.649, sigma=0.171))","(120, N(mu=3.654, sigma=0.169))","(121, N(mu=3.659, sigma=0.167))","(122, N(mu=3.665, sigma=0.166))","(123, N(mu=3.673, sigma=0.164))",...,,"N(mu=3.766, sigma=0.185)",0.185416,3.765912,Jakob,SCHUBERT,1990-12-31,male,AUT,"[{'season': '2023', 'rank': 1, 'discipline': '..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1787,"(195, N(mu=-11.361, sigma=3.024))",,,,,,,,,,...,,"N(mu=-11.361, sigma=3.024)",3.023707,-11.361081,Jordan,ROMIG,1995-04-02,male,USA,"[{'season': '2017', 'rank': 55, 'discipline': ..."
4074,"(202, N(mu=-11.844, sigma=2.938))",,,,,,,,,,...,,"N(mu=-11.844, sigma=2.938)",2.937753,-11.844418,İsmai̇l,ÖZTÜRK,1996-08-27,male,TUR,"[{'season': '2018', 'rank': 95, 'discipline': ..."
6459,"(143, N(mu=-11.919, sigma=3.015))",,,,,,,,,,...,,"N(mu=-11.919, sigma=3.015)",3.014717,-11.918756,Elias,GEORGES DIAB,,male,LIB,"[{'season': '2011', 'rank': 16, 'discipline': ..."
6448,"(179, N(mu=-12.882, sigma=2.897))",,,,,,,,,,...,,"N(mu=-12.882, sigma=2.897)",2.896560,-12.882007,Bror Morten,RANUM,,male,NOR,"[{'season': '2015', 'rank': 56, 'discipline': ..."
