In [1]:
import pandas as pd
import plotly.express as px
import numpy as np

In [2]:
# Toggling the season here updates all data below
season = 'Summer'

In [4]:
athlete_df = pd.read_csv('Olympic_Athlete_Event_Results.csv', usecols=['edition', 'athlete'])
athlete_df[['year', 'season']] = athlete_df['edition'].str.split(' ', expand=True, n=1)

athlete_df['year'] = athlete_df['year'].astype(int)

# Remove "Olympics" from "Summer/Winter Olympics"
athlete_df['season'] = athlete_df['season'].str.split(' ', n=1).str[0]

# rename the column to be uniform with the DataFrame we will merge with later
# not necessary, but helpful to simplify our data
athlete_df = athlete_df.rename(columns={'athlete': 'name'})
athlete_df = athlete_df[athlete_df['season'] == season]

athlete_df.head

<bound method NDFrame.head of                      edition                 name  year  season
0       1908 Summer Olympics      Ernest Hutcheon  1908  Summer
1       1908 Summer Olympics         Henry Murray  1908  Summer
2       1908 Summer Olympics        Harvey Sutton  1908  Summer
3       1908 Summer Olympics          Guy Haskins  1908  Summer
4       1908 Summer Olympics         Joseph Lynch  1908  Summer
...                      ...                  ...   ...     ...
314491  2020 Summer Olympics   Jacarra Winchester  2020  Summer
314492  2020 Summer Olympics       Helen Maroulis  2020  Summer
314493  2020 Summer Olympics        Kayla Miracle  2020  Summer
314494  2020 Summer Olympics  Tamyra Mensah-Stock  2020  Summer
314495  2020 Summer Olympics         Adeline Gray  2020  Summer

[252859 rows x 4 columns]>

In [7]:
sex_df = pd.read_csv('Olympic_Athlete_Bio.csv', usecols=['name', 'sex', 'country'])
sex_df = sex_df[sex_df['sex'] == 'Female']
sex_df

Unnamed: 0,name,sex,country
2,Nathalie Wunderlich,Female,Switzerland
8,Taeko Kubo,Female,Japan
13,Dannette Leininger,Female,United States
14,Nanna Skodborg Merrald,Female,Denmark
16,Hannah Afriyie,Female,Ghana
...,...,...,...
155015,Catarina Lindqvist,Female,Sweden
155016,Yevheniya Filanenko,Female,Ukraine
155018,Frances Schroth,Female,United States
155023,Miyu Nagaoka,Female,Japan


In [9]:
df = athlete_df.merge(sex_df, on='name').sort_values('year')
len(df.index)

67178

In [10]:
appearances = df.groupby(['year', 'country']).size().reset_index(name='country_appearances')
df = df.merge(appearances, on=['year', 'country'])
df

Unnamed: 0,edition,name,year,season,sex,country,country_appearances
0,1896 Summer Olympics,Stamata Revithi,1896,Summer,Female,Greece,2
1,1896 Summer Olympics,Melpomene,1896,Summer,Female,Greece,2
2,1900 Summer Olympics,Mary Abbott,1900,Summer,Female,United States,15
3,1900 Summer Olympics,Margaret Abbott,1900,Summer,Female,United States,15
4,1900 Summer Olympics,Marion Jones,1900,Summer,Female,United States,15
...,...,...,...,...,...,...,...
67173,2020 Summer Olympics,Jeanne Boutbien,2020,Summer,Female,Senegal,3
67174,2020 Summer Olympics,Ndeye Binta Diongue,2020,Summer,Female,Senegal,3
67175,2020 Summer Olympics,Chiara Costa,2020,Summer,Female,Senegal,3
67176,2020 Summer Olympics,Mary Lifu,2020,Summer,Female,Solomon Islands,2


In [11]:
df.to_csv("women-summer.csv")

In [12]:
def powspace(start, stop, power, num):
    '''
    start: first endpoint of resulting array
    stop: last endpoint of resulting array
    power: power to use when spacing out points in array
    num: number of points in resulting array
    '''
    start = np.power(start, 1/float(power))
    stop = np.power(stop, 1/float(power))
    return np.power(np.linspace(start, stop, num=num), power)

In [13]:
colorbar_range = df['country_appearances'].min(), df['country_appearances'].max()

# Pick some thematic color scheme
colors = px.colors.sequential.Redor if season == 'Summer' else px.colors.sequential.OrRd

colormap_vals = powspace(start=0, stop=1, power=3, num=len(colors) - 1)
colormap_vals = [(0, colors[0]), *[(colormap_vals[i], colors[i + 1]) for i in range(len(colormap_vals))]]

In [19]:
fig = px.choropleth(
    df,
    locations="country",
    locationmode='country names',
    color='country_appearances',
    projection='natural earth',
    animation_frame='year',
    title=f'Women {season} Olympics Participation timelapse',
    color_continuous_scale=colormap_vals,
    range_color=colorbar_range)

fig.show() 

In [15]:
import seaborn as sns