# Ironman World Championships

This notebook shows how to work with the IRONMAN/70.3 World Championship data.

In [1]:
import sys; sys.path.append('..'); sys.path.append('../..')

import pandas as pd
import numpy as np

import plotly.express as px

from utils.paths import tasks_folder, data_folder

In [2]:
# Each row is a "subevent". The "70.3 World Championship" is an event, and each year
# that it is run is a subevent. The worlds are also split into M/F results, so there
# is a subevent for each gender also.
df_sub = pd.read_csv(tasks_folder("im/subevents.csv"))

In [3]:
world_champ_names = list(filter(lambda n: 'World' in n, df_sub.name.unique()))
world_champ_names

['70.3 World Championship', 'IRONMAN World Championship']

In [4]:
df_wc = df_sub[(df_sub.name.isin(["IRONMAN World Championship", "70.3 World Championship"]))]

df_wc_im = df_sub[(df_sub.name.isin(["IRONMAN World Championship"])) & (df_sub.series == "IRONMAN")] # redundant filter
df_wc_703 = df_sub[(df_sub.name.isin(["70.3 World Championship"])) & (df_sub.series == "IRONMAN-70.3")] # redundant filter

# df_wc_im.to_csv(tasks_folder("im/IRONMAN World Championship.csv"), index=False)
# df_wc_703.to_csv(tasks_folder("im/70.3 World Championship.csv"), index=False)

In [9]:
subevent_id = "B68F937C-D5B4-462A-9DDA-5ED8FB00BD4E" # 70.3 Women 2023
# subevent_id = "0279BEB6-4DAF-4909-BD37-CD4933BCE10A" # 70.3 Men 2023
# subevent_id = "203D5F2F-9E1A-4355-94B4-09ECF8B5BB84" # 70.3 Combined 2021

df = pd.read_csv(data_folder(f"im/csv/{subevent_id}.csv"))
df.ContactGender.value_counts()

ContactGender
F    2217
M       2
Name: count, dtype: int64

In [19]:
df.iloc[:3]

print("These are the statuses of a result:")
df.EventStatus.unique()

These are the statuses of a result:


array(['Finish', 'DNS', 'DNF', 'DQ', 'NC'], dtype=object)

In [22]:
subevent_id = "B68F937C-D5B4-462A-9DDA-5ED8FB00BD4E" # 70.3 Women 2023
# subevent_id = "0279BEB6-4DAF-4909-BD37-CD4933BCE10A" # 70.3 Men 2023
# subevent_id = "203D5F2F-9E1A-4355-94B4-09ECF8B5BB84" # 70.3 Combined 2021

df = pd.read_csv(data_folder(f"im/csv/{subevent_id}.csv"))
df = df[df.EventStatus == "Finish"] # only finishers

fig = px.histogram(df, x="FinishTime", nbins=50)
fig.show()