In [None]:
import requests
import json
import os
import sys
import time
import pandas as pd
import pickle
from utils import helper as hp
from utils.config_util import Config

## URLS

In [None]:
c = Config('configs/url_config.yaml')
URLS = c.config['URLS']
print(URLS)


In [None]:


NOCS_json = hp.fetch_json(URLS["NOCS_URL"])
NOCS_list = NOCS_json['nocs']
noc_df = pd.DataFrame.from_dict(NOCS_list)
noc_df = noc_df[['id', 'longName', 'continent']]
noc_df.rename(columns={'longName':'noc_name', 'id': 'noc_code'}, inplace=True)
noc_df.to_csv('csvs/noc.csv', index=False)



# Disciplines List

In [None]:
discipline_json = hp.fetch_json(URLS["DISCIPLINES_URL"])
disciplines = discipline_json['disciplines']
disciplines = [{'discipline_code': d['code'], 'discipline': d['description']} for d in disciplines if d['isSport'] ]
# save to simple csv file for future use
discipline_df = pd.DataFrame(disciplines)
discipline_df.to_csv('csvs/all_disciplines.csv', index=False)

In [None]:
_data = hp.fetch_json(URLS["PARTICIPANTS_URL"])
hp.save_json(_data, 'jsons/participants.json')
if _data:
    persons = _data['persons']
    file_list = os.listdir('data')
    file_list = [i.split('.')[0].strip() for i in file_list]
    print(file_list)
    cum = 1
    with open('error2.txt', 'w') as f:
        for i in persons:
            if cum % 100 == 0:
                print(cum)
            code = i['code']
            if code not in file_list:
                # print(code)
                try:
                    json_data = hp.fetch_json(f'https://olympics.com/OG2024/data/CIS_Bio_Athlete~comp=OG2024~code={code}~lang=ENG.json')
                    if json_data:
                        hp.save_json(json_data, f'data/{code}.json')
                        cum += 1
                except Exception as e:
                    f.write(f'{code} {str(e)}\n ')
            
    
            

## Make Atletes Profile

In [None]:

path_to_json = 'jsons/participants.json'
with open(path_to_json, 'r') as file:
    _data = json.load(file)
persons = _data['persons']
file_list = os.listdir('data')
file_list = [i.split('.')[0].strip() for i in file_list]
print(file_list)


In [None]:
codes = [i['code'] for i in persons]
codes = [i for i in codes if i in file_list]
l = []
code_2_disciplines = []
for code in codes:
    with open(f'data/{code}.json', 'r') as file:
        data = json.load(file)
        person_obj = data.get('person', {})
        mainfunction = data.get('mainfunction', {})
        if person_obj:
            mainfunction = person_obj.get('mainFunction', {})
            if mainfunction:
                mainfunc_desc = mainfunction.get('description', '').lower()
                
                if 'athlete' in mainfunc_desc:
                    
                    organisation = person_obj.get('organisation', {})
                    noc_code = organisation.get('code', 'NA')
                    noc_desc = organisation.get('description', 'NA')
                    
                    # nationality = person_obj.get('nationality', {})
                    # noc = nationality.get('code', 'NA')
                    # noc_full = nationality.get('description', 'NA')
                    person_gender = person_obj.get('personGender', {}).get('description', 'NA')
                    dob = person_obj.get('birthDate', 'NA')
                    
                    height = person_obj.get('height', 0)
                    weight = person_obj.get('weight', 0)
                    
                    given_name = person_obj.get('givenName', '')
                    family_name = person_obj.get('familyName', 'NA')
                    d = {}
                    d['mainfunc_desc'] = mainfunc_desc
                    d['code'] = code
                    d['noc'] = noc_code
                    d['noc_full'] = noc_desc
                    d['gender'] = person_gender
                    d['dob'] = dob
                    d['height'] = height
                    d['weight'] = weight
                    d['given_name'] = given_name
                    d['family_name'] = family_name
                    d['picture_url'] = f"'https://olympics.com/OG2024/pic/OG2024/00{str(code)[0]}/{str(code)[1:4]}/medium/{code}.png'"
                    
                    full_name = f'{given_name} {family_name}'.lower().strip()
                    formed_url = '-'.join(full_name.split())
                    formed_url = formed_url if given_name else '-' + formed_url
                    d['detail_url'] = f"'https://olympics.com/en/paris-2024/athlete/{formed_url}_{code}/'"
                    d['api_url'] = f"'https://olympics.com/OG2024/data/CIS_Bio_Athlete~comp=OG2024~code={code}~lang=ENG.json'"
                    if 'disciplines' in person_obj.keys():
                        l.append(d)
                        code_2_disciplines.extend([{'code': code, 'discipline_code': discipline['code'], 'discipline_desc': discipline['description']} for discipline in person_obj['disciplines']])
                
print(len(l))
print(len(code_2_disciplines))
    
        

In [None]:
athlete_df = pd.DataFrame(l)
athletes_disciplines_df = pd.DataFrame(code_2_disciplines)

for athlete_type in athlete_df['mainfunc_desc'].unique():
    athlete_df[athlete_df['mainfunc_desc'] == athlete_type].to_csv(f'csvs/{"_".join(athlete_type.split())}_athletes_comlete.csv', index=False)
athletes_disciplines_df.to_csv('csvs/athletes_disciplines_complete.csv', index=False)

In [None]:
athlete_df

## Olympics Events

In [None]:
json_file_path = 'jsons/all_olympic_events.json'
reload_override = True


hp.initialize(URLS['BASE_API_URL'], reload_override, json_file_path)
# for all files I'll read its
all_data = []
print('reading page', json_file_path)
with open(json_file_path, 'r') as file:
    json_data = json.load(file)
print('json_data is ready!')
units = json_data['units']
groups = json_data['groups']
print('units and groups are ready!')
    

## Build Modals CSVS

In [None]:
medals_json = hp.fetch_json(URLS["MEDALS_URL"])
hp.save_json(medals_json, 'jsons/medals.json')

medal_tables = medals_json['medalStandings']['medalsTable']
total_medals = [] 
_rank = []
for each in medal_tables:
    organisation = each['organisation']
    medals_number_list = each['medalsNumber']
    _rank.append(each['rank'])
    total_obj = [t for t in medals_number_list if t['type'] == 'Total'][0]
    # del total_obj['type']
    total_obj['organisation'] = organisation
    total_obj['rank'] = each['rank']
    total_medals.append(total_obj)
    
noc_df = pd.read_csv('csvs/noc.csv')
print(total_medals)
medals_df = pd.DataFrame(total_medals)
medals_df.drop(columns=['type'], inplace=True)
medals_df['country'] = medals_df['organisation'].apply(lambda x: noc_df[noc_df['noc_code'] == x]['noc_name'].values[0])
medals_df.to_csv('csvs/medals.csv', index=False)
medals_df
