In [2]:
import requests
import json
import html
import pandas as pd



In [4]:
years = ['2021', '2022', '2023', '2024']

for year in years:
    # URL of the JSON file
    url = f'https://100x100trail.com/json/TOR330{year}.json'

    # Send a GET request to fetch the JSON data
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the JSON data
        data = response.json()

        # Optionally, save the data to a JSON file
        with open(f'TOR330{year}.json', 'w') as f:
            json.dump(data, f, indent=4)

        print(f"Data saved to 'TOR330{year}.json'")
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")

Data saved to 'TOR3302021.json'
Data saved to 'TOR3302022.json'
Data saved to 'TOR3302023.json'
Data saved to 'TOR3302024.json'


In [11]:
# Prepare a list to hold the rows for the DataFrame
rows = []

# Function to parse each entry
def parse_data(entries):
    for entry in entries:
        attributes = entry["data"]["attributes"]
        name = html.unescape(attributes["name"]).title()
        team = attributes["team"]
        pettorale = attributes["pettorale"]
        sesso = attributes["sesso"]
        categoria = attributes["categoria"]
        finisher = attributes["finisher"]
        nationality = attributes["nazionalita"]

        # Append the row to the list
        rows.append([team, name, pettorale, sesso, categoria, nationality,  finisher])

        
        
# Load the JSON data from a file
with open('TOR3302024.json', 'r') as file:
    data = json.load(file)
parse_data(data)

# Create a DataFrame
df = pd.DataFrame(rows, columns=["Sponsor", "Name", "Bib",  "Sex", "Category",'Nationality', 'Status'])

# Display the DataFrame
print(df)

                        Sponsor                   Name   Bib Sex Category  \
0        SALOMON INTERNATIONNAL       D'Haene Francois    10   M      SEN   
1                  LA SPORTIVA     Marmissolle  Beñat     25   M       V1   
2     Kailas International Team         Perrier Martin     7   M      SEN   
3       Trail running coaching           Macchi Andrea    20   M      SEN   
4             The Green Runners            Hall Damian    15   M       V1   
...                         ...                    ...   ...  ..      ...   
1091                    runcard  Marziano  Gianfranco   1413   M       V2   
1092               Brooks Trail      Mcconaughy Joseph    11   M      SEN   
1093                       None          Lehmann Anita   592   F       V1   
1094                        BMS         Locci Philippe   221   M       V2   
1095                   Kikourou         Doutte Antoine   463   M       V1   

     Nationality  Status  
0             FR    True  
1             FR    T

### Nationality - Mapping

In [None]:
print(df['Nationality'].unique())

In [21]:
# Create a dictionary for country abbreviations to full names
country_mapping = {
    'FR': 'France', 'CH': 'Switzerland', 'IT': 'Italy', 'GB': 'United Kingdom',
    'ES': 'Spain', 'US': 'United States', 'DE': 'Germany', 'PL': 'Poland',
    'NZ': 'New Zealand', 'JP': 'Japan', 'DK': 'Denmark', 'CY': 'Cyprus',
    'CN': 'China', 'TR': 'Turkey', 'SM': 'San Marino', 'NL': 'Netherlands',
    'AT': 'Austria', 'GR': 'Greece', 'PT': 'Portugal', 'VN': 'Vietnam',
    'NO': 'Norway', 'SE': 'Sweden', 'CZ': 'Czech Republic', 'CA': 'Canada',
    'BR': 'Brazil', 'TH': 'Thailand', 'IE': 'Ireland', 'FI': 'Finland',
    'PE': 'Peru', 'UA': 'Ukraine', 'BE': 'Belgium', 'BG': 'Bulgaria',
    'MY': 'Malaysia', 'MX': 'Mexico', 'LT': 'Lithuania', 'GT': 'Guatemala',
    'KR': 'South Korea', 'CR': 'Costa Rica', 'AR': 'Argentina', 'AQ': 'Antarctica',
    'SG': 'Singapore', 'CL': 'Chile', 'RO': 'Romania', 'RS': 'Serbia',
    'AE': 'United Arab Emirates', 'HK': 'Hong Kong', 'ID': 'Indonesia',
    'SK': 'Slovakia', 'AU': 'Australia', 'SI': 'Slovenia', 'RE': 'Réunion',
    'AD': 'Andorra', 'EE': 'Estonia', 'TW': 'Taiwan', 'ZA': 'South Africa',
    'MO': 'Macau', 'GP': 'Guadeloupe', 'CO': 'Colombia', 'MA': 'Morocco',
    'HU': 'Hungary', 'MT': 'Malta'
}

# Sample DataFrame column with abbreviations
df['Nationality'] = df['Nationality'].map(country_mapping)

# Check the updated DataFrame
print(df[['Nationality']])


         Nationality
0             France
1             France
2        Switzerland
3              Italy
4     United Kingdom
...              ...
1091           Italy
1092   United States
1093     Switzerland
1094          France
1095         Réunion

[1096 rows x 1 columns]


### Extracting Times

In [None]:
row_events = []
# Loop through the data and extract relevant fields
for entry in data[-20:]:
    attributes = entry["data"]["attributes"]
    events = entry["data"]["relationships"]["events"]["data"]
    
    # extracting bib
    bib = attributes["pettorale"]
    
    for event in events:
        print(bib, event)
#         place = events['tile']
#         time = events['start_date']
        

#     # Append the row to the list
#     row_events.append([bib, place, time])


In [31]:
# Loop through the data and extract relevant fields
for entry in data:
    attributes = entry["data"]["attributes"]
    events = entry["data"]["relationships"]["events"]["data"]
    
    name = attributes["name"].strip()
    bib = attributes["pettorale"]
    category = attributes["categoria"]
    status = attributes["finisher"]
    sponsor = attributes["team"]
    sesso = attributes["sesso"]
    
    # Extract event start times (some may be missing)
    start = next((event['start_date'] for event in events if event['title'] == "010 - START"), None)
    baite_youlaz = next((event['start_date'] for event in events if event['title'] == "020 - Baite Youlaz"), None)
    la_thuile = next((event['start_date'] for event in events if event['title'] == "030 - La Thuile"), None)

    # Append to rows
    rows.append([sponsor, name, bib, sesso, category, status, start, baite_youlaz, la_thuile])

# Create a DataFrame
df = pd.DataFrame(rows, columns=["Sponsor", "Name", "Bib", "Ge", "Category 1", "Status", "Start", "Baite Youlaz", "La Thuile"])

# Display the DataFrame
print(df)


1179 {'title': '060 - Rif. Deffeyes 450', 'row': 1, 'start_date': '2024-09-08T20:47:43+02:00', 'age': 31663, 'event_status_id': 194}
1179 {'title': '030 - La Thuile', 'row': 1, 'start_date': '2024-09-08T17:08:51+02:00', 'age': 18531, 'event_status_id': 194}
1179 {'title': '020 - Baite Youlaz', 'row': 1, 'start_date': '2024-09-08T15:54:14+02:00', 'age': 14054, 'event_status_id': 194}
1179 {'title': '010 - START', 'row': 1, 'start_date': '2024-09-08T11:41:32+02:00', 'age': -1108, 'event_status_id': 194}
1159 {'title': '060 - Rif. Deffeyes 450', 'row': 1, 'start_date': '2024-09-08T20:53:42+02:00', 'age': 32022, 'event_status_id': 194}
1159 {'title': '030 - La Thuile', 'row': 1, 'start_date': '2024-09-08T16:38:29+02:00', 'age': 16709, 'event_status_id': 194}
1159 {'title': '020 - Baite Youlaz', 'row': 1, 'start_date': '2024-09-08T15:36:31+02:00', 'age': 12991, 'event_status_id': 194}
1159 {'title': '010 - START', 'row': 1, 'start_date': '2024-09-08T11:41:19+02:00', 'age': -1121, 'event_sta

In [None]:
# Filter for women
women_df = df[df['Sex'] == 'F']

# Count total women and women who finished
total_women = len(women_df)
finished_women = len(women_df[women_df['Status'] == True])

# # Calculate percentage
percentage_finished = (finished_women / total_women) * 100 if total_women > 0 else 0

print(f"Percentage of women who finished: {percentage_finished:.2f}%")


In [23]:
# Assuming df is the DataFrame with athlete data

# Group by nationality
nationality_groups = df.groupby('Nationality')

# Calculate total athletes and those who finished
total_athletes = nationality_groups['Name'].count()
finished_athletes = nationality_groups.apply(lambda x: x[x['Status'] == True]['Name'].count())

# Calculate the percentage of finishers per nationality
percentage_finished = (finished_athletes / total_athletes) * 100

# Create a new DataFrame for clarity
percentage_df = pd.DataFrame({
    'Total Athletes': total_athletes,
    'Finished Athletes': finished_athletes,
    'Percentage Finished (%)': percentage_finished
})

percentage_df.sort_values(by='Percentage Finished (%)', ascending = False).head(50)


Unnamed: 0_level_0,Total Athletes,Finished Athletes,Percentage Finished (%)
Nationality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
United Arab Emirates,1,1,100.0
Costa Rica,1,1,100.0
Lithuania,2,2,100.0
Antarctica,2,2,100.0
Turkey,3,3,100.0
Mexico,2,2,100.0
Chile,1,1,100.0
Czech Republic,2,2,100.0
Cyprus,1,1,100.0
Portugal,13,11,84.615385


['FR' 'CH' 'IT' 'GB' 'ES' 'US' 'DE' 'PL' 'NZ' 'JP' 'DK' 'CY' 'CN' 'TR'
 'SM' 'NL' 'AT' 'GR' 'PT' 'VN' 'NO' 'SE' 'CZ' 'CA' 'BR' 'TH' 'IE' 'FI'
 'PE' 'UA' 'BE' 'BG' 'MY' 'MX' 'LT' 'GT' 'KR' 'CR' 'AR' 'AQ' 'SG' 'CL'
 'RO' 'RS' 'AE' 'HK' 'ID' 'SK' 'AU' 'SI' 'RE' 'AD' 'EE' 'TW' 'ZA' 'MO'
 'GP' 'CO' 'MA' 'HU' 'MT']
