In [1]:
import pandas as pd
import seaborn as sns
import plotly_express as px
import matplotlib.pyplot as plt

directory_data = "../data"

athletes = pd.read_csv(f"{directory_data}/athlete_events.csv")
regions = pd.read_csv(f"{directory_data}/noc_regions.csv")
merged = pd.merge(athletes, regions, on="NOC", how="outer")
df = merged


# Vi kan ej skilja på os-deltagare från OS 1908 eller 1912 då alla är från "region": Australia 
anz = df[df["NOC"]== "ANZ"]
print(anz["region"].value_counts())


region
Australia    86
Name: count, dtype: int64


In [2]:
# Tar bort 1906 års tävlingar som ej räknas som officiella os tävlingar.
df = df[df["Year"]!= 1906]

In [3]:
df.head()
# Personen i fråga fick en honorär utmärkelse som är noterad som guld i datasetet.
aus_1924 = df[(df["Year"] == 1924) & (df["NOC"] == "AUS")]
aus_1924[aus_1924["Season"] == "Winter"]


Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal,region,notes
7323,35388.0,George Ingle Finch,M,35.0,,,Australia,AUS,1924 Winter,1924.0,Winter,Chamonix,Alpinism,Alpinism Mixed Alpinism,Gold,Australia,


In [4]:
# Sorterar ut personen ovanför för att få korrekta grafer nedan.
df = df[df["Sport"] != "Alpinism"]

In [5]:

aus = df[df["NOC"].isin(["AUS", "ANZ"])]
gender_per_year = aus.groupby(["Year", "Season", "Sex"], as_index=False).size().rename(columns={"size":"Count"}
)
gender_per_year["Total"] = gender_per_year.groupby(["Year", "Season"])["Count"].transform("sum")
gender_per_year["Percent"] = gender_per_year["Count"] / gender_per_year["Total"]* 100

fig = px.line(
    gender_per_year, x="Year", y="Percent", color="Sex",facet_row="Season",
    hover_data={
        "Count": True,
        "Total": True,
        "Percent": ":.2f"
    }
    )
    
fig.show()


In [6]:
fig = px.bar(
    gender_per_year, x="Year", y="Percent", color="Sex",
    barmode= "stack",facet_row="Season",
    hover_data={
        "Count": True,
        "Total": True,
        "Percent": ":.2f"
    }
    )
fig.show()

In [23]:
hockey = df[df["Sport"] == "Hockey"]
hockey = hockey[hockey["Medal"].notna()]
medals = (hockey.groupby(["NOC", "Medal"])
        .size().unstack(fill_value=0)) # Räknar medaljer per land

medals["Total"] = medals["Gold"] + medals["Silver"] + medals["Bronze"]
top10 = medals.sort_values(by="Total", ascending=False).head(10).reset_index()

medal_values = ["Bronze", "Silver", "Gold"]


fig = px.bar(
    top10,x=medal_values, y="NOC",
    color_discrete_map={"Gold":"#FFD700", "Silver": "#C0C0C0", "Bronze": "#CD7F32"},
    title="Top 10 countries in fieldhockey (Medals by type)"
)
fig.show()
