In [39]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

directory_data = "../data"

athletes = pd.read_csv(f"{directory_data}/athlete_events.csv")
regions = pd.read_csv(f"{directory_data}/noc_regions.csv")
merged = pd.merge(athletes, regions, on="NOC", how="outer")
df = merged


# Vi kan ej skilja på os-deltagare från OS 1908 eller 1912 då alla är från "region": Australia 
anz = df[df["NOC"]== "ANZ"]
print(anz["region"].value_counts())


region
Australia    86
Name: count, dtype: int64


In [40]:
# Tar bort 1906 års tävlingar som ej räknas som officiella os tävlingar.
df = df[df["Year"]!= 1906]

In [41]:
df.head()
# Personen i fråga fick en honorär utmärkelse som är noterad som guld i datasetet.
aus_1924 = df[(df["Year"] == 1924) & (df["NOC"] == "AUS")]
aus_1924[aus_1924["Season"] == "Winter"]


Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal,region,notes
7323,35388.0,George Ingle Finch,M,35.0,,,Australia,AUS,1924 Winter,1924.0,Winter,Chamonix,Alpinism,Alpinism Mixed Alpinism,Gold,Australia,


In [42]:
# Sorterar ut personen ovanför för att få korrekta grafer nedan.
df = df[df["Sport"] != "Alpinism"]

In [43]:
aus = df[df["NOC"].isin(["AUS", "ANZ"])]
gender_per_year = aus.groupby(["Year", "Season", "Sex"], as_index=False).size().rename(columns={"size":"Count"}
)
gender_per_year["Total"] = gender_per_year.groupby(["Year", "Season"])["Count"].transform("sum")
gender_per_year["Percent"] = gender_per_year["Count"] / gender_per_year["Total"]* 100
gender_per_year_women = gender_per_year[gender_per_year["Sex"] == "F"]
fig = px.scatter(
    gender_per_year_women, x="Year", y="Percent",facet_row="Season",
    hover_data={
        "Count": True,
        "Total": True,
        "Percent": ":.2f"
    }
    )
fig.add_scatter(
    x=gender_per_year_women["Year"],
    y=gender_per_year_women.groupby("Year")["Percent"].mean().values,
    mode="lines",
    name="Trend percent"
)
    
fig.show()


In [44]:
fig = px.bar(
    gender_per_year, x="Year", y="Percent", color="Sex",
    barmode= "stack",facet_row="Season",
    width=900,
    height=500,
    hover_data={
        "Count": True,
        "Total": True,
        "Percent": ":.2f"
    }
    )
fig.show()

In [20]:
per_country_year = df.groupby(["Year","Season", "Team"])["Sex"].value_counts().unstack()
per_country_year["Percent_women_global"] = per_country_year["F"] / (per_country_year["F"] + per_country_year["M"]) * 100
global_avg_per_year = (per_country_year.groupby(["Year", "Season"])["Percent_women_global"].mean())

df_aus = df[df["Team"] == "Australia"]
per_year_aus = df_aus.groupby(["Year", "Season"])["Sex"].value_counts().unstack(fill_value=0)
per_year_aus["Percent_women_aus"] = per_year_aus["F"] / (per_year_aus["F"] + per_year_aus["M"]) *100

compare = pd.concat([global_avg_per_year, per_year_aus["Percent_women_aus"]], axis=1)
compare_reset = compare.reset_index()
compare_reset = compare_reset.rename(columns={
    "Percent_women_global": "Globalt",
    "Percent_women_aus": "Australien"
})
fig = px.bar(
    compare_reset, x="Year", y=["Globalt", "Australien"],
    barmode="group",
    facet_row="Season",
    labels={
        "value": "Andel kvinnor %",
        "variable": "Kategori",
        "Year": "År",
        "Season": "Säsong",

    },
    title="Andel kvinnliga deltagare - Globalt vs Australien (stapeldiagram)"
)
fig.show()

fig=px.line(
    compare_reset, x="Year", y=["Globalt", "Australien"],
    facet_row="Season",
    labels={
        "value": "Andel kvinnor %",
        "variable": "Kategori",
        "Year": "År",
        "Season": "Säsong",
    },
    title="Andel kvinnliga deltagare - Globalt vs Australien (linjediagram)"
)
fig.show()

In [46]:
canada = df[df["NOC"] == "CAN"]
gender_per_year_can = canada.groupby(["Year", "Sex", "Season"], as_index=False).size().rename(columns={"size":"Count"})
gender_per_year_can["Total"] = gender_per_year_can.groupby(["Year", "Season"])["Count"].transform("sum")
gender_per_year_can["Percent"] = gender_per_year_can["Count"] / gender_per_year_can["Total"]* 100

fig = px.bar(
    gender_per_year_can, x="Year", y="Percent", color="Sex", barmode="stack", facet_row="Season",
    width=900,
    height=500,
    hover_data={
        "Count": True,
        "Total": True,
        "Percent": ":.2f"
    }
    )
    
fig.show()


In [19]:
countries = ["CAN", "SWE", "GBR"]


def gender_compare(df, country):
    country_df = df[df["NOC"] == country]
    gender = country_df.groupby(["Year", "Sex"], as_index=False).size().rename(columns={"size":"Count"})
    gender["Total"] = gender.groupby(["Year"])["Count"].transform("sum")
    gender["Percent"] = gender["Count"] / gender["Total"]* 100
    gender["NOC"] = country

    return gender

gender_per_year["NOC"] = "AUS"

df_gender = pd.concat([gender_compare(df, c) for c in countries], ignore_index=True)
df_gender = pd.concat([df_gender, gender_per_year],axis=0)



fig = px.bar(
        df_gender, x="Year", y="Percent", color="Sex", barmode="stack", facet_row="NOC",
        width=900,
        height=500,
        hover_data={
            "Count": True,
            "Total": True,
            "Percent": ":.2f"
        }
        )
    
fig.show()


In [48]:
df_female = df_gender[df_gender["Sex"] == "F"]
fig = px.line(
    df_female, x="Year", y="Percent", color="NOC", width=900,
        height=500
)
fig.show()

In [49]:
df_sorted = df_gender.sort_values(["NOC", "Year", "Sex"])

fig = px.area(
    df_sorted, x="Year", y="Percent", color="Sex",
    facet_col="NOC", line_group="Sex",
)
fig.show()

In [None]:
hockey = df[df["Sport"] == "Hockey"]
hockey = hockey[hockey["Medal"].notna()]
medals_hockey = (hockey.groupby(["NOC", "Medal"])
        .size().unstack(fill_value=0)) # Räknar de olika medaljerna och fyller med 0 där det saknas


medals_hockey["Total"] = (medals_hockey["Gold"] + medals_hockey["Silver"] + medals_hockey["Bronze"]) # Räknar ihop totala antal medaljer
top10_hockey = medals_hockey.sort_values(by="Total", ascending=False).head(10).reset_index()

medal_values = ["Bronze", "Silver", "Gold"]

fig = px.bar(
    top10_hockey, x=medal_values, y="NOC",
    color_discrete_map={"Gold":"#FFD700", "Silver": "#C0C0C0", "Bronze": "#CD7F32"},
    title="Top 10 länder i landhockey"
)


fig.show()
