![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

# Data Science Example - Olympics

Some visualizations of [statistics from the modern Olympic Games](https://www.kaggle.com/heesoo37/120-years-of-olympic-history-athletes-and-results/data#athlete_events.csv).

In [None]:
import pandas as pd
import plotly.io as pio
import plotly.express as px

In [None]:
# Read data

olympics = pd.read_csv('data/olympics.csv')


In [None]:
athletes_by_year = olympics.groupby(["Year"]).size()
fig = px.line(x=athletes_by_year.index,y= athletes_by_year.values,title='Number of Athletes per Year (Olympics)',
       labels={"y":"Number of Athletes","x":"Year"})

fig.show()
# Save as HTML
#pio.write_html(fig,"olympics_Number_of_Athletes_per_Year.html", auto_open=True)

In [None]:
winter_athletes_by_year = olympics[olympics['Season']=='Winter'].groupby(["Year"]).size()

fig = px.line(x=winter_athletes_by_year.index,y=winter_athletes_by_year.values,
             labels={"y":"Number of Athletes","x":"Year"},title='Number of Athletes per Year (Winter Olympics)')

fig.show()
# Save as html
#pio.write_html(fig,"olympics_(WINTER)_Number_of_Athletes_per_Year.html", auto_open=True)

In [None]:
medals = olympics.dropna(subset=["Medal"])
medals_winter = medals[medals["Season"]=="Winter"]
top_ten = medals_winter.groupby('region').count().sort_values('Medal',ascending=False)['Medal'].head(10)

fig = px.bar(x=top_ten.index,y=top_ten.values,title='Medals per Country (Winter Olympics, Top 10 Countries)',
            labels={"x":"Country",'y':'Number of Medals'})

fig.show()

# Save as html
#pio.write_html(fig,"olympics_top_10_medals.html", auto_open=True)

In [None]:
canada_medals = medals[medals['region']=='Canada']
medals_per_sport = canada_medals.groupby('Sport').count().sort_values('Medal',ascending=False)['Medal']

fig = px.bar(x=medals_per_sport.index,y=medals_per_sport.values,
             title='Canadian Olympic Medals per Sport',
            labels={"x":"Country",'y':'Number of Medals'})

fig.show()

# Save as html
#pio.write_html(fig,"olympics_canadian_medals_per_sport.html", auto_open=True)

In [None]:
df_canada_medals = pd.DataFrame(canada_medals.groupby('Sport').count().sort_values('Medal',ascending=False)['Medal'])
df_canada_medals.rename(columns={'Medal':'Medals'}, inplace=True)
df_canada_medals

In [None]:
medals_per_athlete = pd.DataFrame(medals.groupby('Name').count().sort_values('Medal',ascending=False).head(20)['Medal'])
medals_per_athlete_no_dp = medals_per_athlete.merge(olympics[['Name','region']].drop_duplicates('Name'),on='Name')


fig = px.bar(y=medals_per_athlete_no_dp.Name,x=medals_per_athlete_no_dp.Medal,
             title='Number of Olympic Medals per Athlete (Top 20)',
            labels={"x":"Number of Medals",'y':'Name'},
            text=medals_per_athlete_no_dp["region"])

fig.show()

# Save as html
pio.write_html(fig,"olympics_top20_athletes.html", auto_open=True)


[![Callysto.ca License](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-bottom.jpg?raw=true)](https://github.com/callysto/curriculum-notebooks/blob/master/LICENSE.md)