In [None]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from plotly.subplots import make_subplots

In [None]:
datasetSummer=pd.read_csv("../input/olympic-games/summer.csv")
datasetWinter=pd.read_csv("../input/olympic-games/winter.csv")
country=pd.read_csv("../input/olympic-games/dictionary.csv")

In [None]:
datasetSummer.isna().sum()

In [None]:
datasetWinter.isna().sum()

In [None]:
datasetSummer.head()

In [None]:
datasetWinter.head()

In [None]:
country.head()

In [None]:
datasetSummer=pd.merge(datasetSummer,country[["Country","Code"]],left_on="Country",right_on="Code")
datasetWinter=pd.merge(datasetWinter,country[["Country","Code"]],left_on="Country",right_on="Code")

In [None]:
datasetSummer.head()

In [None]:
datasetSummer.drop(["Country_x"],axis=1,inplace=True)
datasetSummer.rename(columns={"Country_y":"Country"},inplace=True)
datasetWinter.drop(["Country_x"],axis=1,inplace=True)
datasetWinter.rename(columns={"Country_y":"Country"},inplace=True)

In [None]:
datasetSummer.head()

# Summer Olympics Analysis

## Medal Distribution (Choropleth)

In [None]:
world1=px.choropleth(locations=datasetSummer.groupby("Country").count().index,color=datasetSummer.groupby("Country").count()["Event"],locationmode="country names",labels={"locations":"Country","color":"Medals"},width=800,title="Medal Distribution")
world1.show()

## Athletes With Higher Medals

In [None]:
datasetSummer.groupby(["Athlete","Medal"]).count().sort_values(by="Event",ascending=False).reset_index().drop_duplicates("Medal")[["Athlete","Medal","Event"]]

## Summer Olympics Medal Distribution

### References

https://stackoverflow.com/questions/19202093/how-to-select-columns-from-groupby-object-in-pandas

In [None]:
bronzeDistribution=datasetSummer[datasetSummer["Medal"]=="Bronze"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})
bronzeDistribution.head()

In [None]:
silverDistribution=datasetSummer[datasetSummer["Medal"]=="Silver"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})
silverDistribution.head()

In [None]:
goldDistribution=datasetSummer[datasetSummer["Medal"]=="Gold"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})
goldDistribution.head()

In [None]:
fig1=make_subplots()
fig1.add_trace(go.Bar(y=bronzeDistribution["Count"],x=bronzeDistribution.index.get_level_values('Country'),name="Brozne",marker_color='#cd7f32'))
fig1.add_trace(go.Bar(y=silverDistribution["Count"],x=silverDistribution.index.get_level_values('Country'),name="Silver",marker_color='#c0c0c0'))
fig1.add_trace(go.Bar(y=goldDistribution["Count"],x=goldDistribution.index.get_level_values('Country'),name="Gold",marker_color='#daa520'))
fig1.update_layout(width=1200,
    title="Medal Distribution",
    xaxis_title="Country",
    yaxis_title="Count",
    barmode="stack")
fig1.show()

## Summer Olympics Top 10 Countries

In [None]:
top10Countries=datasetSummer.groupby("Country").count()[["Year"]].sort_values(by="Year",ascending=False).rename(columns={"Year":"Count"}).head(10)
top10Countries

In [None]:
pd.merge(datasetSummer[datasetSummer["Medal"]=="Bronze"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)

In [None]:
fig2=make_subplots()
# gold
fig2.add_trace(go.Bar(x=pd.merge(datasetSummer[datasetSummer["Medal"]=="Gold"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetSummer[datasetSummer["Medal"]=="Gold"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Gold",marker_color='#daa520'))
# silver
fig2.add_trace(go.Bar(x=pd.merge(datasetSummer[datasetSummer["Medal"]=="Silver"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetSummer[datasetSummer["Medal"]=="Silver"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Silver",marker_color='#c0c0c0'))
# bronze
fig2.add_trace(go.Bar(x=pd.merge(datasetSummer[datasetSummer["Medal"]=="Bronze"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetSummer[datasetSummer["Medal"]=="Bronze"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Bronze",marker_color='#cd7f32'))
fig2.update_layout(
    title="Summer Olympics Top 10 Countries",
    xaxis_title="Countries",
    yaxis_title="Medal Count",
    width=1200)
fig2.show()

## Country Comparison

In [None]:
fig3=make_subplots()
# first trace
fig3.add_trace(go.Bar(x=datasetSummer[datasetSummer["Country"]=="Hungary"].groupby("Medal").count().index,y=datasetSummer[datasetSummer["Country"]=="Hungary"].groupby("Medal").count()["Code"],name="Hungary"))
# second trace
fig3.add_trace(go.Bar(x=datasetSummer[datasetSummer["Country"]=="Austria"].groupby("Medal").count().index,y=datasetSummer[datasetSummer["Country"]=="Austria"].groupby("Medal").count()["Code"],name="Austria"))

buttons1=[]
buttons2=[]

for i in datasetSummer["Country"].unique():
    button=dict(
        label=i,
        args=[{"y":[datasetSummer[datasetSummer["Country"]==i].groupby("Medal").count()["Code"]],"name":i},[0]],
        method="restyle"
    )
    buttons1.append(button)

for i in datasetSummer["Country"].unique():
    button=dict(
        label=i,
        args=[{"y":[datasetSummer[datasetSummer["Country"]==i].groupby("Medal").count()["Code"]],"name":i},[1]],
        method="restyle"
    )
    buttons2.append(button)

fig3.update_layout(
    title="Country Comparison",
    updatemenus=[
        dict(
            buttons=buttons1,
            active=0,
            direction="down",
            showactive=True,
            xanchor="left",
            yanchor="top",
            x=0.41,
            y=1.13
        ),
        dict(
            buttons=buttons2,
            active=1,
            direction="down",
            showactive=True,
            xanchor="left",
            yanchor="top",
            x=0.71,
            y=1.13
        )
    ],
    height=500,
    width=800
)
fig3.show()

## Gender vs. Medal Distribution

In [None]:
fig4=px.histogram(data_frame=datasetSummer,x="Medal",color="Gender")
fig4.update_layout(title="Gender vs. Medal Distribution")
fig4.show()

## Yearly Distribution of Medals

In [None]:
Year=datasetSummer["Year"].unique()
Year.sort()
fig5=make_subplots()
fig5.add_trace(go.Scatter(x=Year,y=datasetSummer[datasetSummer["Medal"]=="Bronze"].groupby(["Year"]).count()["Code"],marker_color="#cd7f32",name="Bronze"))
fig5.add_trace(go.Scatter(x=Year,y=datasetSummer[datasetSummer["Medal"]=="Silver"].groupby(["Year"]).count()["Code"],marker_color="#c0c0c0",name="Silver"))
fig5.add_trace(go.Scatter(x=Year,y=datasetSummer[datasetSummer["Medal"]=="Gold"].groupby(["Year"]).count()["Code"],marker_color="#daa520",name="Gold"))
fig5.update_layout(title="Yearly Distribution of Medals",xaxis_title="Year",yaxis_title="Count")
fig5.show()

## Top 10 Countries of each year and their Medal Distribution

In [None]:
fig6=make_subplots(rows=14,cols=2,subplot_titles=Year.astype(str))
rows,cols=1,1
for i in Year:
    countries=datasetSummer[(datasetSummer["Year"]==i) ].groupby(["Country"]).count().sort_values(by="Code",ascending=False).head(10).index

    fig6.add_trace(go.Bar(x=datasetSummer[(datasetSummer["Year"]==i) & (datasetSummer["Medal"]=="Bronze") & (datasetSummer["Country"].isin(countries))].groupby(["Country"]).count().head(10).index,y=datasetSummer[(datasetSummer["Year"]==i) & (datasetSummer["Medal"]=="Bronze") & (datasetSummer["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Bronze",marker_color="#cd7f32"),row=rows,col=cols)
    fig6.add_trace(go.Bar(x=countries,y=datasetSummer[(datasetSummer["Year"]==i) & (datasetSummer["Medal"]=="Silver") & (datasetSummer["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Silver",marker_color="#c0c0c0"),row=rows,col=cols)
    fig6.add_trace(go.Bar(x=countries,y=datasetSummer[(datasetSummer["Year"]==i) & (datasetSummer["Medal"]=="Gold") & (datasetSummer["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Gold",marker_color="#daa520"),row=rows,col=cols)

    cols=(cols%2)+1
    if(cols==1):
        rows=rows+1
fig6.update_layout(height=5000,width=1200,showlegend=False,title="Top 10 Countries of each year and their Medal Distribution")
fig6.show()

## Performance of Men vs Women

In [None]:
fig7=px.line(datasetSummer.groupby(["Gender","Year","Medal"]).count().reset_index().rename(columns={"Event":"Medal Count"}),x="Year",y="Medal Count",color="Medal",facet_row="Gender",color_discrete_map={"Bronze": '#cd7f32',"Silver":'#c0c0c0',"Gold":'#daa520'})
fig7.update_layout(title="Performance of Men vs Women",height=600,width=1200)
fig7.show()

# Winter Olympics Analysis

In [None]:
world2=px.choropleth(locations=datasetWinter.groupby("Country").count().index,color=datasetWinter.groupby("Country").count()["Event"],locationmode="country names",labels={"locations":"Country","color":"Medals"},width=800,title="Medal Distribution")
world2.show()

## Athletes with Higher Medals

In [None]:
datasetWinter.groupby(["Athlete","Medal"]).count().sort_values(by="Event",ascending=False).reset_index().drop_duplicates("Medal")[["Athlete","Medal","Event"]]

## Atheletes with More Medals

In [None]:
datasetWinter.groupby(["Athlete","Medal"]).count().sort_values(by="Event",ascending=False).reset_index().drop_duplicates("Medal")[["Athlete","Medal","Event"]]

## Winter Olympics Medal Distribution

In [None]:
bronzeDistribution=datasetWinter[datasetWinter["Medal"]=="Bronze"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})

silverDistribution=datasetWinter[datasetWinter["Medal"]=="Silver"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})

goldDistribution=datasetWinter[datasetWinter["Medal"]=="Gold"].groupby(["Country"]).count()[["Year"]].rename(columns={"Year":"Count"})

In [None]:
fig1=make_subplots()
fig1.add_trace(go.Bar(y=bronzeDistribution["Count"],x=bronzeDistribution.index.get_level_values('Country'),name="Brozne",marker_color='#cd7f32'))
fig1.add_trace(go.Bar(y=silverDistribution["Count"],x=silverDistribution.index.get_level_values('Country'),name="Silver",marker_color='#c0c0c0'))
fig1.add_trace(go.Bar(y=goldDistribution["Count"],x=goldDistribution.index.get_level_values('Country'),name="Gold",marker_color='#daa520'))
fig1.update_layout(width=1200,
    title="Medal Distribution",
    xaxis_title="Country",
    yaxis_title="Count",
    barmode="stack")
fig1.show()

## Winter Olympics Top 10 Countries

In [None]:
top10Countries=datasetWinter.groupby("Country").count()[["Year"]].sort_values(by="Year",ascending=False).rename(columns={"Year":"Count"}).head(10)
top10Countries

In [None]:
fig2=make_subplots()
# gold
fig2.add_trace(go.Bar(x=pd.merge(datasetWinter[datasetWinter["Medal"]=="Gold"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetWinter[datasetWinter["Medal"]=="Gold"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Gold",marker_color='#daa520'))
# silver
fig2.add_trace(go.Bar(x=pd.merge(datasetWinter[datasetWinter["Medal"]=="Silver"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetWinter[datasetWinter["Medal"]=="Silver"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Silver",marker_color='#c0c0c0'))
# bronze
fig2.add_trace(go.Bar(x=pd.merge(datasetWinter[datasetWinter["Medal"]=="Bronze"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False).index,y=pd.merge(datasetWinter[datasetWinter["Medal"]=="Bronze"],top10Countries,left_on="Country",right_on="Country",how="inner").groupby(by=["Country"]).count().sort_values(by="Count",ascending=False)["Count"],name="Bronze",marker_color='#cd7f32'))
fig2.update_layout(
    title="Winter Olympics Top 10 Countries",
    xaxis_title="Countries",
    yaxis_title="Medal Count",
    width=1200)
fig2.show()

## Country Comparison

In [None]:
fig3=make_subplots()
# first trace
fig3.add_trace(go.Bar(x=datasetWinter[datasetWinter["Country"]=="France"].groupby("Medal").count().index,y=datasetWinter[datasetWinter["Country"]=="France"].groupby("Medal").count()["Code"],name="France"))
# second trace
fig3.add_trace(go.Bar(x=datasetWinter[datasetWinter["Country"]=="Switzerland"].groupby("Medal").count().index,y=datasetWinter[datasetWinter["Country"]=="Switzerland"].groupby("Medal").count()["Code"],name="Switzerland"))

buttons1=[]
buttons2=[]

for i in datasetWinter["Country"].unique():
    button=dict(
        label=i,
        args=[{"y":[datasetWinter[datasetWinter["Country"]==i].groupby("Medal").count()["Code"]],"name":i},[0]],
        method="restyle"
    )
    buttons1.append(button)

for i in datasetWinter["Country"].unique():
    button=dict(
        label=i,
        args=[{"y":[datasetWinter[datasetWinter["Country"]==i].groupby("Medal").count()["Code"]],"name":i},[1]],
        method="restyle"
    )
    buttons2.append(button)

fig3.update_layout(
    title="Country Comparison",
    updatemenus=[
        dict(
            buttons=buttons1,
            active=0,
            direction="down",
            showactive=True,
            xanchor="left",
            yanchor="top",
            x=0.53,
            y=1.13
        ),
        dict(
            buttons=buttons2,
            active=1,
            direction="down",
            showactive=True,
            xanchor="left",
            yanchor="top",
            x=0.77,
            y=1.13
        )
    ],
    height=500,
    width=800
)
fig3.show()

## Gender vs. Medal Distribution

In [None]:
fig4=px.histogram(data_frame=datasetWinter,x="Medal",color="Gender")
fig4.update_layout(title="Gender vs. Medal Distribution")
fig4.show()

In [None]:
Year=datasetWinter["Year"].unique()
Year.sort()
fig5=make_subplots()
fig5.add_trace(go.Scatter(x=Year,y=datasetWinter[datasetWinter["Medal"]=="Bronze"].groupby(["Year"]).count()["Code"],marker_color="#cd7f32",name="Bronze"))
fig5.add_trace(go.Scatter(x=Year,y=datasetWinter[datasetWinter["Medal"]=="Silver"].groupby(["Year"]).count()["Code"],marker_color="#c0c0c0",name="Silver"))
fig5.add_trace(go.Scatter(x=Year,y=datasetWinter[datasetWinter["Medal"]=="Gold"].groupby(["Year"]).count()["Code"],marker_color="#daa520",name="Gold"))
fig5.update_layout(title="Yearly Distribution of Medals",xaxis_title="Year",yaxis_title="Count")
fig5.show()

## Top 10 Countries of each year and their Medal Distribution

In [None]:
fig6=make_subplots(rows=11,cols=2,subplot_titles=Year.astype(str))
rows,cols=1,1
for i in Year:
    countries=datasetWinter[(datasetWinter["Year"]==i) ].groupby(["Country"]).count().sort_values(by="Code",ascending=False).head(10).index

    fig6.add_trace(go.Bar(x=datasetWinter[(datasetWinter["Year"]==i) & (datasetWinter["Medal"]=="Bronze") & (datasetWinter["Country"].isin(countries))].groupby(["Country"]).count().head(10).index,y=datasetWinter[(datasetWinter["Year"]==i) & (datasetWinter["Medal"]=="Bronze") & (datasetWinter["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Bronze",marker_color="#cd7f32"),row=rows,col=cols)
    fig6.add_trace(go.Bar(x=countries,y=datasetWinter[(datasetWinter["Year"]==i) & (datasetWinter["Medal"]=="Silver") & (datasetWinter["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Silver",marker_color="#c0c0c0"),row=rows,col=cols)
    fig6.add_trace(go.Bar(x=countries,y=datasetWinter[(datasetWinter["Year"]==i) & (datasetWinter["Medal"]=="Gold") & (datasetWinter["Country"].isin(countries))].groupby(["Country"]).count().head(10)["Code"],name="Gold",marker_color="#daa520"),row=rows,col=cols)

    cols=(cols%2)+1
    if(cols==1):
        rows=rows+1
fig6.update_layout(height=5000,width=1200,showlegend=False,title="Top 10 Countries of each year and their Medal Distribution")
fig6.show()

## Performance of Men vs Women

In [None]:
fig7=px.line(datasetWinter.groupby(["Gender","Year","Medal"]).count().reset_index().rename(columns={"Event":"Medal Count"}),x="Year",y="Medal Count",color="Medal",facet_row="Gender",color_discrete_map={"Bronze": '#cd7f32',"Silver":'#c0c0c0',"Gold":'#daa520'})
fig7.update_layout(title="Performance of Men vs Women",height=600,width=1200)
fig7.show()