In [1]:
import os
from glob import glob

import numpy as np
import pandas as pd

import altair as alt
from ipywidgets import interact, fixed

from tqdm import tqdm, trange

# tell plotting library not to try and hold everything in memory
alt.data_transformers.enable('json')
# and let it render good
alt.renderers.enable('notebook')

RendererRegistry.enable('notebook')

In [2]:

def load(type, melt=False):
    if type not in ['sports', 'events']:
        print('type can be either \'sports\' or \'events\'')
        return

    sport_fn = glob('json/{}/*.json'.format(type))
    sport_name = [s.split(os.sep)[-1].split('.')[0].capitalize() for s in sport_fn]

    data = {}
    for fn, sport in tqdm(zip(sport_fn, sport_name), total=len(sport_fn), desc="Loading {}".format(type)):
        if melt:
            temp = pd.read_json(fn)
            #columns = [c for c in temp.columns if e not in ['name', 'country', 'event', 'note','phase','season','sport','unit']]
            #print(columns)
            id_vars = [var for var in ['name', 'country', 'event', 'note','phase','season','sport','unit','year']
                   if var in temp.columns]
            data[sport] = temp.melt(id_vars=id_vars)
            data[sport]['value'] = pd.to_numeric(data[sport]['value'], errors='coerce')
        else:
            data[sport] = pd.read_json(fn)

    return data

In [3]:
events = load('events', melt=True)

Loading events: 100%|██████████| 686/686 [01:13<00:00,  8.69it/s]


In [55]:
events[events.keys()[10]].head()
#events[events.keys()[0]]['variable'].unique()

Unnamed: 0,name,country,event,note,phase,season,sport,unit,year,variable,value
0,albert rowland,australasia,men's 10 mile walk,,round one,summer,athletics,heat one,1908,age,22.0
1,arne hãjme,denmark,men's 10 mile walk,dnf,round one,summer,athletics,heat one,1908,age,23.0
2,piet ruimers,netherlands,men's 10 mile walk,,round one,summer,athletics,heat two,1908,age,23.0
3,piet soudyn,netherlands,men's 10 mile walk,dnf,round one,summer,athletics,heat one,1908,age,28.0
4,ralph harrison,great britain,men's 10 mile walk,qu,round one,summer,athletics,heat two,1908,age,


['value']

In [92]:
def event_viewer(data, event):
    data = data[event]
    
    # w.r.t. jakevdp @ https://github.com/altair-viz/altair/issues/965#issuecomment-399773773
    select_box = alt.binding_select(options=list(data['variable'].unique()))
    
    selection = alt.selection_single(name="y_axis", fields=['variable'], bind=select_box)
    
    country_select = alt.selection_multi(fields=['country'])
    color = alt.condition(country_select,
                      alt.Color('country:N', legend=None),
                      alt.value('lightgray'))
    
    
    legend = alt.Chart(data).mark_point().encode(
        y=alt.Y('country:N', axis=alt.Axis(orient='right')),
        color=color
    ).add_selection(
        country_select
    )
    
    scales = alt.selection_interval(bind='scales')
    
    #slider = alt.binding_range(min=1900, max=2014, step=2)
    #select_year = alt.selection_single(name="year", fields=['year'], bind=slider)
        
    chart = alt.Chart(data).mark_circle().encode(
        x=alt.X('year:Q',scale=alt.Scale(domain=[1900,2014])),
        y='value:Q',
        color=color,
        tooltip='name:N'
    ).add_selection(
        selection
    ).add_selection(
        scales
    ).transform_filter(
        selection
    ).properties(
        width = 700,
        height = 500,)
    return chart | legend

In [93]:
interact(event_viewer, event = sorted(events.keys()), data=fixed(events));

aW50ZXJhY3RpdmUoY2hpbGRyZW49KERyb3Bkb3duKGRlc2NyaXB0aW9uPXUnZXZlbnQnLCBvcHRpb25zPSgiQWxwaW5lIHNraWluZy1tZW4ncyBkb3duaGlsbCIsICJBbHBpbmUgc2tpaW5nLW3igKY=
