# Final Visualisation

In [20]:
import numpy as np
import pandas as pd
import altair as alt

In [21]:
alt.renderers.enable('notebook')

RendererRegistry.enable('notebook')

In [22]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [23]:
urld = "https://raw.githubusercontent.com/t0nyj/AbnormalDistribution/master/input/athlete_events.csv?token=ACVHZKYZGRLR5JWHEI4MJGK5LBHXM"
urlr = "https://raw.githubusercontent.com/t0nyj/AbnormalDistribution/master/input/noc_regions.csv?token=ACVHZK55UTLJIICN62S5REK5LBHY4"

In [24]:
data = pd.read_csv(urld)
regions = pd.read_csv(urlr)

In [25]:
merged = pd.merge(data, regions, on='NOC', how='left')

In [26]:
# get subset of gold medal winners from summer olympics
gold = merged[(merged.Medal=='Gold')&(merged.Season=='Summer')]

In [27]:
# remove nulls
gold=gold[np.isfinite(gold['Age'])&np.isfinite(gold['Height'])&np.isfinite(gold['Weight'])]

In [28]:
gold = gold[~pd.isnull(gold.region)]

In [29]:
# define selection
selection = alt.selection_multi(fields=['Sport'], empty='none', on='mouseover')
color = alt.condition(selection,
                      alt.Color('Sport:N', legend=None),
                      alt.value('lightgray'))

In [30]:
# get top 25 countries with most medals
n = 25
countrylist = gold['region'].value_counts()[:n].index.tolist()
# define dropdown list
country_dropdown = alt.binding_select(options=countrylist, name='Country: ')
country_select = alt.selection_single(fields=['region'], bind=country_dropdown)

In [31]:
# no of gold medalists in various sports
plot1 = alt.Chart(gold).mark_bar().encode(
x = alt.X('Sport:O'),
y = alt.Y('count()',title="No of golds"),
color=color
).properties(
    width=400
).add_selection(
    selection,
    country_select
).properties(
    title='Gold Medal distribution by Sport',height = 400
).transform_filter(
    country_select
)

In [32]:
# Age distribution histogram
plot2 = alt.Chart(gold).mark_bar(color='teal').encode(
x=alt.X('Age:Q'),
y=alt.Y('count()',scale=alt.Scale(domain=(0, 500), clamp = True),title="No of golds")
).properties(
    title='Age Distribution of Gold medals',
    height= 100,
    width=300
).add_selection(
    selection, 
    country_select
).transform_filter(
    country_select
).transform_filter(
    selection
)

In [33]:
# Physical attributes of females
plot3 = alt.Chart(gold[gold.Sex=='F']).mark_point().encode(
x=alt.X('Height:Q', scale=alt.Scale(domain=(130,230)), title="Height(cm)"),
y=alt.Y('Weight:Q', scale=alt.Scale(domain=(20,180)), title="Weight(kg)"),
tooltip=['Name:O','Year:O','Height:Q','Weight:Q','Event:O', 'Sport:O'],
color=color
).add_selection(
    country_select
).properties(
    title='Height vs Weight for Female Gold medalists'
).transform_filter(
    country_select
)

In [35]:
# Physical attributes of females
plot4 = alt.Chart(gold[gold.Sex=='M']).mark_point().encode(
x=alt.X('Height:Q', scale=alt.Scale(domain=(130,230)), title="Height(cm)"),
y=alt.Y('Weight:Q', scale=alt.Scale(domain=(20,180)), title="Weight(kg)"),
tooltip=['Name:O','Year:O','Height:Q','Weight:Q','Event:O', 'Sport:O'],
color=color
).add_selection(
    country_select
).properties(
    title='Height vs Weight for Male Gold medalists'
).transform_filter(
    country_select
)

In [36]:
# bringing it all together
chart = plot1 & plot2 | plot3 & plot4

In [37]:
# saving the visualisation
chart.save('chart.html', embed_options={'renderer':'svg'})

In [None]:
# preview
chart