In [1]:
import requests
import json
import html
import pandas as pd
import time
import re
import unicodedata



In [2]:
years = ['2021', '2022', '2023', '2024']
races = ['TOR330', 'TOR450', 'TOR130']

## Grabbing Nationality from TORX Data

In [6]:
# Initialize a set to store unique nationalities
unique_nationalities = set()

for race in races:
    for year in years:
        df = pd.read_excel(f'{race} Data/100x100trail/{race}_{year}.xlsx')
        print(f'{race}_{year} {df.shape}')
        df['Nationality'] = df['Nationality'].str.strip()

        # Get unique nationalities for the year and update the set
        nationalities = df['Nationality'].unique()
        unique_nationalities.update(nationalities )        
        
        # Convert the set back to a sorted list
unique_nationalities_list = sorted(unique_nationalities)

print(len(unique_nationalities_list))
print(unique_nationalities_list)


TOR330_2021 (712, 8)
TOR330_2022 (960, 8)
TOR330_2023 (1206, 8)
TOR330_2024 (1096, 8)
TOR450_2021 (56, 8)
TOR450_2022 (149, 8)
TOR450_2023 (187, 8)
TOR450_2024 (185, 8)
TOR130_2021 (264, 8)
TOR130_2022 (388, 8)
TOR130_2023 (505, 8)
TOR130_2024 (393, 8)
92
['AD', 'AE', 'AI', 'AQ', 'AR', 'AT', 'AU', 'BE', 'BG', 'BN', 'BO', 'BR', 'CA', 'CH', 'CL', 'CN', 'CO', 'CR', 'CY', 'CZ', 'DE', 'DK', 'EC', 'EE', 'ES', 'FI', 'FR', 'GB', 'GF', 'GG', 'GP', 'GR', 'GT', 'HK', 'HR', 'HU', 'ID', 'IE', 'IL', 'IM', 'IN', 'IR', 'IS', 'IT', 'JE', 'JO', 'JP', 'KR', 'KZ', 'LT', 'LU', 'LV', 'MA', 'MC', 'ME', 'MO', 'MQ', 'MT', 'MU', 'MX', 'MY', 'NC', 'NL', 'NO', 'NZ', 'PE', 'PF', 'PH', 'PL', 'PT', 'QA', 'RE', 'RO', 'RS', 'RU', 'SA', 'SE', 'SG', 'SI', 'SK', 'SM', 'SN', 'SS', 'TH', 'TR', 'TW', 'UA', 'US', 'UY', 'VE', 'VN', 'ZA']


## Making sense of abbreviated nationalities

In [7]:
country_mapping = {
    'AD': 'Andorra', 'AE': 'United Arab Emirates', 'AI': 'Anguilla', 'AQ': 'Antarctica', 
    'AR': 'Argentina', 'AT': 'Austria', 'AU': 'Australia', 'BE': 'Belgium', 'BG': 'Bulgaria',
    'BN': 'Brunei', 'BO': 'Bolivia', 'BR': 'Brazil', 'CA': 'Canada', 'CH': 'Switzerland',
    'CL': 'Chile', 'CN': 'China', 'CO': 'Colombia', 'CR': 'Costa Rica', 'CY': 'Cyprus',
    'CZ': 'Czech Republic', 'DE': 'Germany', 'DK': 'Denmark', 'EC': 'Ecuador', 'EE': 'Estonia',
    'ES': 'Spain', 'FI': 'Finland', 'FR': 'France', 'GB': 'United Kingdom', 'GF': 'French Guiana',
    'GG': 'Guernsey', 'GP': 'Guadeloupe', 'GR': 'Greece', 'GT': 'Guatemala', 'HK': 'Hong Kong',
    'HR': 'Croatia', 'HU': 'Hungary', 'ID': 'Indonesia', 'IE': 'Ireland', 'IL': 'Israel',
    'IM': 'Isle of Man', 'IN': 'India', 'IR': 'Iran', 'IS': 'Iceland', 'IT': 'Italy',
    'JE': 'Jersey', 'JO': 'Jordan', 'JP': 'Japan', 'KR': 'South Korea', 'KZ': 'Kazakhstan',
    'LT': 'Lithuania', 'LU': 'Luxembourg', 'LV': 'Latvia', 'MA': 'Morocco', 'MC': 'Monaco',
    'ME': 'Montenegro', 'MO': 'Macau', 'MQ': 'Martinique', 'MT': 'Malta', 'MU': 'Mauritius',
    'MX': 'Mexico', 'MY': 'Malaysia', 'NC': 'New Caledonia', 'NL': 'Netherlands', 'NO': 'Norway',
    'NZ': 'New Zealand', 'PE': 'Peru', 'PF': 'French Polynesia', 'PH': 'Philippines', 'PL': 'Poland',
    'PT': 'Portugal', 'QA': 'Qatar', 'RE': 'Réunion', 'RO': 'Romania', 'RS': 'Serbia', 'RU': 'Russia',
    'SA': 'Saudi Arabia', 'SE': 'Sweden', 'SG': 'Singapore', 'SI': 'Slovenia', 'SK': 'Slovakia',
    'SM': 'San Marino', 'SN': 'Senegal', 'SS': 'South Sudan', 'TH': 'Thailand', 'TR': 'Turkey',
    'TW': 'Taiwan', 'UA': 'Ukraine', 'US': 'United States', 'UY': 'Uruguay', 'VE': 'Venezuela',
    'VN': 'Vietnam', 'ZA': 'South Africa'
}
len(country_mapping)

92

In [8]:
# Convert dictionary to DataFrame
country_df = pd.DataFrame(list(country_mapping.items()), columns=['Nationality', 'Nationality Name'])

print(country_df)

country_df .to_excel(f'Database Data/TORX_100x100trail_nationality_table.xlsx', sheet_name = 'Nationality Code', index = False)

   Nationality      Nationality Name
0           AD               Andorra
1           AE  United Arab Emirates
2           AI              Anguilla
3           AQ            Antarctica
4           AR             Argentina
..         ...                   ...
87          US         United States
88          UY               Uruguay
89          VE             Venezuela
90          VN               Vietnam
91          ZA          South Africa

[92 rows x 2 columns]
