In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Read and Prepare Data

In [2]:
import pandas as pd

df = pd.read_csv(r'data\olympics.csv')
df.head()

Unnamed: 0,Year,City,Sport,Discipline,Athlete,Country Code,Country,Gender,Event,Medal
0,1924,Chamonix,Biathlon,Biathlon,"BERTHET, G.",FRA,France,Men,Military Patrol,Bronze
1,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, C.",FRA,France,Men,Military Patrol,Bronze
2,1924,Chamonix,Biathlon,Biathlon,"MANDRILLON, Maurice",FRA,France,Men,Military Patrol,Bronze
3,1924,Chamonix,Biathlon,Biathlon,"VANDELLE, André",FRA,France,Men,Military Patrol,Bronze
4,1924,Chamonix,Biathlon,Biathlon,"AUFDENBLATTEN, Adolf",SUI,Switzerland,Men,Military Patrol,Gold


In [3]:
# dataframe for sports
S = df.groupby(['Year', 'Gender', 'Sport']).size().to_frame('count').reset_index()

# dummy dataframe to add zeros to null sports
data = []
for y in set(S.Year):
    for s in set(S.Sport):
        for g in set(S.Gender):
            data.append((y, s, g))
D = pd.DataFrame(data, columns=["Year", "Sport", "Gender"])
D['count'] = 0

# left join and fill NAs with zeros
df_sport = S.merge(D, on=['Year', 'Gender', 'Sport'], how='right')

df_sport = df_sport.drop('count_y', axis=1)
df_sport = df_sport.rename(columns={'count_x': 'count_medals'})

df_sport['count_medals'] = df_sport['count_medals'].fillna(0)

df_sport.head()

Unnamed: 0,Year,Gender,Sport,count_medals
0,1924,Men,Biathlon,12.0
1,1924,Men,Bobsleigh,13.0
2,1924,Men,Curling,22.0
3,1924,Men,Ice Hockey,31.0
4,1924,Men,Skating,22.0


In [4]:
# generate dataframe for ratio, number of medals women by men
men_medals = df[df.Gender=='Men'].groupby(['Year']).size()
women_medals = df[df.Gender=='Women'].groupby(['Year']).size()

df_ratio = women_medals/men_medals

df_ratio = df_ratio.reset_index(name='ratio')

df_ratio.head()

Unnamed: 0,Year,ratio
0,1924,0.053571
1,1928,0.072289
2,1932,0.054545
3,1936,0.090909
4,1948,0.12


## Generate Plot

In [17]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots


fig = make_subplots(
    specs=[[{"secondary_y": True}]]
)

fig.update_layout(
    barmode="stack"
#     template="simple_white",
)

colors = ['#762a83', '#af8dc3', '#e7d4e8', '#f7f7f7', '#d9f0d3', '#7fbf7b', '#1b7837']
# color_secondary_axis = "rgb(102,166,30)"
color_secondary_axis='Black'

for s, c in zip(df.Sport.unique(), colors):
    
    plot_df = df_sport[df_sport.Sport == s]
    plot_df = plot_df.sort_values(by=['Year'])
    
    fig.add_trace(
        go.Bar(x=[plot_df.Year, plot_df.Gender], 
               y=plot_df.count_medals, 
               name=s, 
               marker_color=c,
               hovertemplate=
                    '<b>Medals</b>: %{y}'+
                    '<br><b>Year</b>: %{x[0]}'+
                    '<br><b>Gender</b>: %{x[1]}'+
                    '<br><b>Sport</b>: %{text}'+
                    '<extra></extra>',
               text=plot_df.Sport
        ), secondary_y=False
    )

# add border line to stacked bars
fig.update_traces(marker_line_color='rgb(8,48,107)', marker_line_width=1)
    
fig.add_trace(
    go.Scatter(
        x = [df_ratio.Year,['Women']*len(df_ratio)],
        y = df_ratio.ratio,
        name = 'Medals Ratio',
        mode='lines+markers',
        marker=dict(size=5),
        hovertemplate = 
                '<b>Medals Ratio</b>: %{y:.2f}'+
                '<br><b>Year</b>: %{x[0]}'+
                '<extra></extra>',
        showlegend = True, 
        line = dict(color=color_secondary_axis, width=2, dash='dot')
    ), secondary_y=True)
    
    
fig.update_layout(
    title={
        'text': "<b>Winter Olympics Participation Over The Years Per Gender</b>",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01,
        font_size=14),
    width=1300,
    height=700,
    hoverlabel=dict(
        bgcolor="white",
        font_size=14),
    legend_title = dict(
        font = dict(size = 14),
        text='<b>Sport Categories</b>'
    )
)

fig.update_xaxes(title_text="<b>Gender and Year</b>")

fig.update_yaxes(title_text="<b>Number of Medals</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Women to Men Medals Ratio</b>", color=color_secondary_axis, 
                linecolor=color_secondary_axis, secondary_y=True, linewidth=3)
    
fig.show()

fig.write_html("final_plot.html")