In [1]:
import pandas as pd
import plotly.express as px

In [2]:
athlete_data = pd.read_csv("./dataset/athlete_events.csv")
noc_data = pd.read_csv("./dataset/noc_regions.csv")
country_continent_df = pd.read_csv("./dataset/country_continent.csv")

In [3]:
athlete_data.head()

Unnamed: 0,ID,Name,Sex,Age,Height,Weight,Team,NOC,Games,Year,Season,City,Sport,Event,Medal
0,1,A Dijiang,M,24.0,180.0,80.0,China,CHN,1992 Summer,1992,Summer,Barcelona,Basketball,Basketball Men's Basketball,
1,2,A Lamusi,M,23.0,170.0,60.0,China,CHN,2012 Summer,2012,Summer,London,Judo,Judo Men's Extra-Lightweight,
2,3,Gunnar Nielsen Aaby,M,24.0,,,Denmark,DEN,1920 Summer,1920,Summer,Antwerpen,Football,Football Men's Football,
3,4,Edgar Lindenau Aabye,M,34.0,,,Denmark/Sweden,DEN,1900 Summer,1900,Summer,Paris,Tug-Of-War,Tug-Of-War Men's Tug-Of-War,Gold
4,5,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,NED,1988 Winter,1988,Winter,Calgary,Speed Skating,Speed Skating Women's 500 metres,


In [4]:
 noc_data.head()

Unnamed: 0,NOC,region,notes
0,AFG,Afghanistan,
1,AHO,Curacao,Netherlands Antilles
2,ALB,Albania,
3,ALG,Algeria,
4,AND,Andorra,


In [5]:
country_continent_df.head()

Unnamed: 0,name,alpha-2,alpha-3,country-code,iso_3166-2,region,sub-region,intermediate-region,region-code,sub-region-code,intermediate-region-code
0,Afghanistan,AF,AFG,4,ISO 3166-2:AF,Asia,Southern Asia,,142.0,34.0,
1,Åland Islands,AX,ALA,248,ISO 3166-2:AX,Europe,Northern Europe,,150.0,154.0,
2,Albania,AL,ALB,8,ISO 3166-2:AL,Europe,Southern Europe,,150.0,39.0,
3,Algeria,DZ,DZA,12,ISO 3166-2:DZ,Africa,Northern Africa,,2.0,15.0,
4,American Samoa,AS,ASM,16,ISO 3166-2:AS,Oceania,Polynesia,,9.0,61.0,


In [6]:
def set_country_data(row):
    find_map = country_continent_df.loc[
        (country_continent_df['name'] == row['region']) |
        (country_continent_df['alpha-3'] == row['NOC'])
    ]
    
    if not find_map.empty:
        row['continent'] = find_map.iloc[0]['region']
        row['country_iso'] = find_map.iloc[0]['alpha-3']
        
    return row

noc_data = noc_data.apply(lambda row: set_country_data(row), axis=1)
noc_data.head()

Unnamed: 0,NOC,continent,country_iso,notes,region
0,AFG,Asia,AFG,,Afghanistan
1,AHO,,,Netherlands Antilles,Curacao
2,ALB,Europe,ALB,,Albania
3,ALG,Africa,DZA,,Algeria
4,AND,Europe,AND,,Andorra


In [7]:
country_medal_count_df = pd.DataFrame(columns=['NOC', 'Year', 'Medal count', 'country_iso', 'region', 'continent'])

for index, noc_row in noc_data.iterrows():
    country_data = athlete_data[(athlete_data['NOC'] == noc_row['NOC']) & athlete_data['Medal'].notnull()]
    agg_count = country_data.groupby(['Year']).agg(['count'])
    agg_count.reset_index()
    for index, row in agg_count.iterrows():
        new_row = pd.DataFrame({
            'Year': index,
            'NOC': noc_row['NOC'],
            'country_iso': noc_row['country_iso'],
            'region': noc_row['region'],
            'Medal count': row['Medal'],
            'continent': noc_row['continent']
        })
        
        country_medal_count_df = country_medal_count_df.append(new_row, ignore_index=True)
        
print("DONE!")


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





DONE!


In [11]:
country_medal_count_df = country_medal_count_df.astype({'Medal count': 'int32', 'Year': 'int32'}).sort_values(by='Year')
country_medal_count_df.head()

array(['Europe', 'Americas', 'Oceania', nan, 'Asia', 'Africa'],
      dtype=object)

In [30]:
map_plot_df = country_medal_count_df[country_medal_count_df['country_iso'].notnull()]
map_plot_df = map_plot_df[map_plot_df['continent'].notnull()]

fig = px.scatter_geo(
    map_plot_df,
    locationmode="ISO-3",
    locations="country_iso",
    hover_name="region",
    size="Medal count",
    animation_frame="Year",
    color="continent",
    projection='natural earth'
)

fig.update_layout(
    title_text = 'Olympics over the years',
    showlegend = True,
)

fig.show()

In [100]:
import plotly.graph_objects as go

colors = {
    "Europe": "royalblue",
    "Americas": "crimson",
    "Oceania": "lightseagreen",
    "Asia": "orange",
    "Africa": "lightgrey"
}

sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Year:",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": []
}

fig_dict = {
    "data": [],
    "layout": {},
    "frames": []
}

map_plot_df = country_medal_count_df[country_medal_count_df['country_iso'].notnull()]
map_plot_df = map_plot_df[map_plot_df['continent'].notnull()]

In [101]:
# Get start data
year = map_plot_df['Year'].unique()[0]
for continent in map_plot_df['continent'].unique():
    dataset_by_year = map_plot_df[map_plot_df["Year"] == year]
    dataset_by_year_and_cont = dataset_by_year[dataset_by_year["continent"] == continent]
    
#     if dataset_by_year_and_cont.empty:
#         continue
    
    data_dict = go.Scattergeo(
                    locationmode="ISO-3",
                    locations=dataset_by_year_and_cont["country_iso"],
                    text=dataset_by_year_and_cont["region"],
                    marker=dict(
                        size=dataset_by_year_and_cont["Medal count"],
                        sizemode='area'
                    ),
                    name=continent
                )
    
    fig_dict['data'].append(data_dict)

In [102]:

def click_callback(trace, points, selector):
    c = list(trace.marker.color)
    s = list(trace.marker.size)
    for i in points.point_inds:
        c[i] = '#bae2be'
        s[i] = 20
        with f.batch_update():
            scatter.marker.color = c
            scatter.marker.size = s


# Get frames
for year in map_plot_df['Year'].unique():
    year_df = map_plot_df[map_plot_df['Year'] == year]
    
    frame = {"data": [], "name": str(year)}
    
    for continent in map_plot_df['continent'].unique():
        continent_df = year_df[year_df['continent'] == continent]
        
        if continent_df.empty:
            continue
        
        data_dict = go.Scattergeo(
            locationmode="ISO-3",
            locations=continent_df["country_iso"],
            text=continent_df["region"],
            marker=dict(
                size=continent_df["Medal count"],
                sizemode='area',
                color=colors[continent]
            ),
            name=continent
        )
        
        data_dict.on_click(click_callback)
        
        frame["data"].append(data_dict)
    
    fig_dict['frames'].append(frame)
    
    slider_step = {
        "args": [
            [year],
            { 
                "frame": {"duration": 300, "redraw": False},
                "mode": "immediate",
                "transition": { "duration": 300 }
            }
        ],
        "label": str(year),
        "method": "animate"
    }
    
    sliders_dict["steps"].append(slider_step)

In [103]:
fig = go.Figure(
    data=fig_dict['data'],
    frames=fig_dict['frames'],
    layout= go.Layout(
        title_text = 'Olympic Medals over the Years<br>(Click legend to toggle traces)',
        showlegend = True,
        
        geo = dict(
            landcolor = 'rgb(217, 217, 217)',
        ),
        
        sliders=[sliders_dict],
        
        updatemenus=[
            {
                "buttons": [
                    {
                        "args": [
                            None, 
                            { 
                                "frame": { "duration": 500, "redraw": True },
                                "fromcurrent": True, 
                                "transition": {"duration": 300, "easing": "quadratic-in-out"}
                            }
                        ],
                        "label": "Play",
                        "method": "animate"
                    },
                    {
                        "args": [
                            [ None ], 
                            {
                                "frame": {"duration": 0, "redraw": True},
                                "mode": "immediate",
                                "transition": {"duration": 0}
                            }
                        ],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left",
            "pad": {"r": 10, "t": 87},
            "showactive": False,
            "type": "buttons",
            "x": 0.1,
            "xanchor": "right",
            "y": 0,
            "yanchor": "top"
        }
    ])
)

fig.show()