# Australia in Olympics

In [47]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import plotly.express as px
import numpy as np
import hashlib as hl

directory_data = "../data"                                     # Keep pathway flexible (Adams version)

athletes = pd.read_csv(f"{directory_data}/athlete_events.csv")
regions = pd.read_csv(f"{directory_data}/noc_regions.csv")

# Anonymize names:
athletes["Name"] = athletes["Name"].astype(str)                #"Name" to str
athletes["Hash"] = athletes["Name"].apply(lambda x: hl.sha256(x.encode()).hexdigest())  #Hash values of names
athletes.drop(columns=["Name"], inplace=True)                  # Anonymize

merged = pd.merge(athletes, regions, on="NOC", how="outer")    # merge on NOC (olympc standard landcodes)
                                                               #"outer" to have correct data for when exploring. 



## Australian Olympic Athlete Participation

Australia competed together with New Zealand before WW1 under the regional name “Australasia.”
An interesting sidenote: after WW1, New Zealand began competing independently. This shift coincides with a growing sense of national identity among New Zealanders. An identity shaped by the country’s heavy losses during the war, its increasingly distinct sense of cultural identity, and a gradual move away from being seen as merely part of the British Commonwealth.

In [48]:

australia = merged[merged["NOC"].isin(["AUS", "ANZ"])]   #sort on both "AUS" and "ANZ"

In [49]:
#merge "AUS" and "ANS" to get both NOC:S
australia = merged[merged["NOC"].isin(["AUS", "ANZ"])]   




participation_historical_aus = australia.groupby("Year")["ID"].nunique() #. groupby() gör om year till index
                                            # groupby(): skapa lådan (x-axel), .nunique() innehåll [y-axel]

df_aus_partition = participation_historical_aus.reset_index() #.reset_index Gör om year från index till kolumn.
df_aus_partition.columns = ["Year", "ID"]

fig = px.line(
    df_aus_partition,
    x= "Year", 
    y= "ID",
    title= "Australia participation 1896–2016"
)

fig.show()





### Sort on season för better stats

The Summer Olympics data shows spikes and drops that align with major events such as the 1956 Olympics in Melbourne (where all equestrian events were relocated to Stockholm due to Australia’s strict horse quarantine rules) and the 2000 Olympics in Sydney.

There is an interesting spike in Olympic participation in 1984.
This year marked the 200-year commemoration of the arrival of the British on Australian soil. The event is historically significant, but also deeply contested and far from universally celebratory, especially among Indigenous Australians.
It was also the year when the first direct flight between Australia and the United States was launched.

Even though neither a single-year commemoration nor a new flight route directly explains the increase in Olympic participation, the 1980s in Australia were characterised by a broader sense of national optimism and identity-building, which may help explain the rise in athlete numbers during this period.

In [50]:
# Split by Summer/Winter Olympics
australia_summer = australia[australia["Season"] == "Summer"]
australia_winter = australia[australia["Season"] == "Winter"]

# Count unique athletes per year (Summer)
aus_participation_summer = australia_summer.groupby("Year")["ID"].nunique()

# Reset index for Plotly 
df_aus_participation_summer = aus_participation_summer.reset_index()
df_aus_participation_summer.columns = ["Year", "ID"]

# Plot Summer Olympics participation
fig = px.line(
    df_aus_participation_summer,
    x="Year",
    y="ID",
    title="Summer Olympics Participation (Australia)"
)

fig.show()

An interesting observation in the Winter Olympics data is the noticeable spike in 1960.
This occurred because Australia qualified for ice hockey that year. The first and only time in history, which led to a significant increase in participating athletes.

The upward trend after 2002 marks the beginning of Australia’s rise in winter sports.
It all started with a seemingly unlikely gold medalist in speed skating: he won simply by being the only one who didn’t fall in a spectacular mass crash.
What began as an unexpected victory has since sparked a growing interest in winter sports in Australia. And additional gold medals have followed up until today.



In [51]:
# Count unique athletes per year (Winter)
aus_participation_winter = australia_winter.groupby("Year")["ID"].nunique()

# Reset index for Plotly
df_aus_participation_winter = aus_participation_winter.reset_index()
df_aus_participation_winter.columns = ["Year", "ID"]

# Plot Winter Olympics participation
fig = px.line(
    df_aus_participation_winter,
    x="Year",
    y="ID",
    title="Winter Olympics Participation (Australia)"
)

fig.show()

#make it a summer/winter plot instead.

In [52]:
#merge "AUS" and "ANS" to get both NOC:S
australia = merged[merged["NOC"].isin(["AUS", "ANZ"])]   



[]
participation_historical_aus = australia.groupby(["Year", "Season"])["ID"].nunique() #. groupby() gör om year till index
                                            # groupby(): skapa lådan (x-axel), .nunique() innehåll [y-axel]

df_aus_partition = participation_historical_aus.reset_index() #.reset_index Gör om year från index till kolumn.
df_aus_partition.columns = ["Year", "Season", "ID"]

fig = px.line(
    df_aus_partition,
    x= "Year", 
    y= "ID",
    color="Season",
    markers=True,
    title= "Australia participation 1896–2016"
)

fig.show()



log-skala.  fig-axeln logaritmisk

In [53]:
#merge "AUS" and "ANS" to get both NOC:S
australia = merged[merged["NOC"].isin(["AUS", "ANZ"])]   


participation_historical_aus = australia.groupby(["Year", "Season"])["ID"].nunique().reset_index(name="Participants") #. groupby() turns year to index
                                                                                                                      #  .nunique() content in y-axle

fig = px.line(
    participation_historical_aus,
    x= "Year", 
    y= "Participants",
    color="Season",
    markers=True,
    title= "Australia participation 1896–2016"
)

fig.update_yaxes(
    type="log",
    title_text="Participants (log scale)")                   #Clearer curve. Shows growth instead of absolute counts.


fig.show()

# Equestrianism

In [54]:
#sort on sports to find how equestrian sports is defined in the column-name
merged_sport = merged["Sport"].unique()
sport_equestrianism = merged[merged["Sport"] == "Equestrianism"]                       #sort out equestrianism
sport_equestrianism_medals = sport_equestrianism[sport_equestrianism["Medal"].notna()]

noc_region = merged[["NOC", "region"]].drop_duplicates().reset_index()

#Gold
sport_equestrianism_gold = sport_equestrianism[sport_equestrianism["Medal"] == "Gold"]

df_equestrian_gold = sport_equestrianism_gold["NOC"].value_counts().reset_index()    
df_equestrian_gold = df_equestrian_gold.rename(columns={"count": "Gold"})   

df_equestrian_noc_region_gold = df_equestrian_gold.merge(noc_region, on="NOC", how="left")

gold_equestrianism_top10 = df_equestrian_noc_region_gold.sort_values(by="Gold", ascending=False).head(10)  # top 10 med störst först.

country_colors = {
    "GER": "#000000",   
    "SWE": "#0057B8",   
    "FRA": "#0055A4",   
    "USA": "#3C3B6E",   
    "FRG": "#555555",  
    "GBR": "#C8102E",   
    "AUS": "#007A33",   
    "NED": "#FF7F00",  
    "URS": "#D52B1E",   
    "ITA": "#008C45"    
}

fig = px.bar(gold_equestrianism_top10,
            x="NOC",
            y="Gold", 
            title='Equestrianism - Countries top 10 gold medals',
            labels={'Gold':'Gold Medals'},
            color='NOC',                                                         # change från region to NOC
            color_discrete_map=country_colors,                                      # country colors 
            category_orders={"NOC": gold_equestrianism_top10["NOC"].tolist()}    #Does not follow the order from ascending=False
            )                                                                    #asked chatGPT for help
fig.update_layout(
    plot_bgcolor="#9F8F5E",     # backgruond
    paper_bgcolor="white"         #Frame
)


fig.show()
gold_equestrianism_top10["NOC"].tolist()



['GER', 'SWE', 'FRA', 'USA', 'FRG', 'GBR', 'AUS', 'NED', 'URS', 'ITA']

In [55]:
all_years = sorted(merged["Year"].unique())
equestrian_years = sorted(sport_equestrianism["Year"].unique())
missing_years = [year for year in all_years if year not in equestrian_years]
missing_years


[np.float64(1896.0),
 np.float64(1904.0),
 np.float64(1906.0),
 np.float64(1908.0),
 np.float64(1994.0),
 np.float64(1998.0),
 np.float64(2002.0),
 np.float64(2006.0),
 np.float64(2010.0),
 np.float64(2014.0),
 np.float64(nan)]

## Equestrianism through the years

Focusing only on gold medals does not give a representative picture of the sport. There are too few variables.

In [56]:
gold_year_noc = (sport_equestrianism_gold.groupby(["Year", "NOC"], as_index=False).size().rename(columns={"size": "Gold"})
)
#.groupby(["Year", "NOC"] -> grupperar guldmedaljer och noc - as_index=False -> behåller i kolumner 

sorted_historical_equestrianism = gold_year_noc.sort_values(["Year", "Gold"], ascending=[True, False])


top10_historical_equestrianism = sorted_historical_equestrianism.groupby("Year").head(20)


top10_historical_equestrianism["Year"].value_counts().sort_index()


country_colors_gold_years = {
    "BEL": "#FAE042",  # Belgium - yellow
    "FRA": "#0055A4",  # France - blue
    "ITA": "#008C45",  # Italy - green
    "SWE": "#0057B8",  # Sweden - blue
    "NED": "#FF7F00",  # Netherlands - orange
    "SUI": "#FF0000",  # Switzerland - red
    "GER": "#000000",  # Germany - black
    "ESP": "#AA151B",  # Spain - red
    "TCH": "#11457E",  # Czechoslovakia - blue
    "USA": "#3C3B6E",  # USA - navy
    "JPN": "#BC002D",  # Japan - red
    "MEX": "#006341",  # Mexico - green
    "GBR": "#C8102E",  # Great Britain - red
    "AUS": "#007A33",  # Australia - green
    "URS": "#D52B1E",  # Soviet Union - red
    "CAN": "#B22234",  # Canada - red
    "FRG": "#6B6B6B",  # West Germany - gray
    "AUT": "#ED2939",  # Austria - red
    "POL": "#D22630",  # Poland - red
    "NZL": "#00247D",  # New Zealand - blue
    "BRA": "#009C3B",  # Brazil - green
}

fig = px.bar(
    top10_historical_equestrianism,
    x="NOC",
    y="Gold",
    color="NOC",  # eller "NOC" om du hellre vill det
    animation_frame="Year",   # gör så du kan "bläddra" år för år
    title="Equestrianism – Top 10 guldmedalj-länder per år",
    labels={"Gold": "Guldmedalljer", "NOC": "Land (NOC)"}
)
fig.update_layout(
    plot_bgcolor="#9F8F5E",     # backgruond
    paper_bgcolor="white"         #Frame
)
fig.show()
top10_historical_equestrianism["NOC"].unique().tolist()

['BEL',
 'FRA',
 'ITA',
 'SWE',
 'NED',
 'SUI',
 'GER',
 'ESP',
 'TCH',
 'USA',
 'JPN',
 'MEX',
 'GBR',
 'AUS',
 'URS',
 'CAN',
 'FRG',
 'AUT',
 'POL',
 'NZL',
 'BRA']

## Equestrianism top 10 medals

In [58]:

sport_equestrianism_medals = sport_equestrianism[sport_equestrianism["Medal"].notna()]
medals_sorted_per_year = (sport_equestrianism_medals.groupby(["Year", "NOC", "Medal"])
                          .size()
                          .reset_index(name="Count"))                                 #convert "Counts" into a column


# Separate columns per medal type
medals_all_years = medals_sorted_per_year.pivot_table(
    values="Count",
    index=["Year", "NOC"],
    columns="Medal",
    fill_value=0
).reset_index()

medals_all_years.columns.name = None



medals_all_years["Total"] = (
    medals_all_years["Gold"]
    + medals_all_years["Silver"]
    + medals_all_years["Bronze"]
    )

medals_all_years["Year"] = medals_all_years["Year"].astype(int)

medals_all_sorted = medals_all_years.sort_values(
    ["Year", "Total"],
    ascending=[True, False]
)


top10_per_year = medals_all_sorted.groupby("Year").head(10)

# convert from wide to long format
sorted_medals_melt = medals_sorted_per_year(
    id_vars= ["Year", "NOC"],
    var_name="Medaltype",
    value_vars=["Gold", "Silver", "Bronze"],
    value_name="Amount"
    )

fig = px.bar(
    sorted_medals_melt,
    x= "NOC",
    y=["Gold","Silver", "Bronze"],
    color_discrete_map={
        "Gold":"#9F8F5E",
        "Silver": "#969696",
        "Bronze": "#996B4F"
        },
    title="TOP 10 Olympic Equestranism countries/year",
    labels={"value": "Medals total", "NOC":"Region", "variable":"Medals sort"},
    barmode="group"
)
# kod nedan utvecklad med hjälp av Claude (Anthropic, 2025). Konversation: 16 november 2025:
#frågan var hur jag kunde få tydligare graf.

fig.update_layout(                               
    xaxis_tickangle=-45,                         #vrider NOC text så den är lättare att läsa
    height=600,                                  # höjd på  grafen
    xaxis={'categoryorder': 'total descending'}  # Sorterar inom staplarna inom varje år från mest till minst
)

fig.show()
sorted_medals_melt.head()


TypeError: 'DataFrame' object is not callable

In [None]:
medals_sorted_per_year = (sport_equestrianism_medals.groupby(["Year", "NOC", "Medal"])
                          .size()
                          .reset_index(name="Count"))                                 #convert "Counts" into a column


# Separate columns per medal type
medals_all_years = medals_sorted_per_year.pivot_table(
    values="Count",
    index=["Year", "NOC"],
    columns="Medal",
    fill_value=0
).reset_index()

medals_all_years.columns.name = None

# convert from wide to long format
sorted_medals_melt = medals_sorted_per_year(
    id_vars= ["Year", "NOC"],
    var_name="Medaltype",
    var_name=["Gold", "Silver", "Bronze"],
    value_name="Amount"
    )

medals_all_years["Total"] = (
    medals_all_years["Gold"]
    + medals_all_years["Silver"]
    + medals_all_years["Bronze"]
    )

medals_all_years["Year"] = medals_all_years["Year"].astype(int)

medals_all_sorted = medals_all_years.sort_values(
    ["Year", "Total"],
    ascending=[True, False]
)


top10_per_year = medals_all_sorted.groupby("Year").head(10)


fig = px.bar(
    sorted_medals_melt,
    x= "NOC",
    y=["Gold","Silver", "Bronze"],
    color_discrete_map={
        "Gold":"#9F8F5E",
        "Silver": "#969696",
        "Bronze": "#996B4F"
        },
    title="TOP 10 Olympic Equestranism countries/year",
    labels={"value": "Medals total", "NOC":"Region", "variable":"Medals sort"},
    barmode="group"
)
# kod nedan utvecklad med hjälp av Claude (Anthropic, 2025). Konversation: 16 november 2025:
#frågan var hur jag kunde få tydligare graf.

fig.update_layout(                               
    xaxis_tickangle=-45,                         #vrider NOC text så den är lättare att läsa
    height=600,                                  # höjd på  grafen
    xaxis={'categoryorder': 'total descending'}  # Sorterar inom staplarna inom varje år från mest till minst
)

fig.show()



NameError: name 'sport_equestrianism_medals' is not defined

3 stapeldiagram och dropdown!
https://dash-example-index.herokuapp.com/cultural-dimensions

In [None]:
# Import necessary libraries
import plotly.express as px
import pandas as pd

# Create the DataFrame
data = {'Quarters': ['Q1', 'Q2', 'Q3', 'Q4'],
      'Product A': [200, 150, 100, 180],
      'Product B': [220, 130, 90, 150],
      'Product C': [210, 160, 130, 170]}

df = pd.DataFrame(data)

# Convert the DataFrame from wide to long format
df_melt = df.melt(id_vars='Quarters', var_name='Products', value_name='Sales')

# Create the grouped bar chart
fig = px.bar(df_melt, x='Quarters', y='Sales', color='Products', barmode='group')

# Show the plot
fig.show()