In [4]:
from bokeh.io import curdoc, output_notebook
import numpy as np
import pandas as pd
import bokeh
from bokeh.core.properties import field
from bokeh.layouts import layout
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper
)
from bokeh.plotting import figure
from bokeh.palettes import Spectral6 as palette #@UnresolvedImport
from bokeh.sampledata.gapminder import regions
output_notebook()

In [5]:
# -*- coding: utf-8 -*-

def process_data(df, r1, r2):
    df = df.unstack().unstack()
    df = df[(df.index >= r1) & (df.index <= r2)]
    df = df.unstack().unstack()
    return df


employment_data = pd.read_excel('indicator_t above 15 employ.xlsx', encoding='utf8', index_col=0)
hiv_data = pd.read_excel('indicator hiv estimated prevalence% 15-49.xlsx', encoding='utf8', index_col=0)
life_expectancy_data = pd.read_excel('indicator life_expectancy_at_birth.xlsx', encoding='utf8', index_col=0)
population = pd.read_excel('indicator gapminder population.xlsx', encoding='utf8', index_col=0)

employment_data = process_data(employment_data, 1991, 2011)
hiv_data = process_data(hiv_data, 1991, 2011)
life_expectancy_data = process_data(life_expectancy_data, 1991, 2011)
population = process_data(population, 1991, 2011)


# have common countries across all data
common_countries = (life_expectancy_data.index.intersection(employment_data.index)).intersection(hiv_data.index)
employment_data = employment_data.loc[common_countries]
population = population.loc[common_countries]
hiv_data = 100 * hiv_data.loc[common_countries]
life_expectancy_data = life_expectancy_data.loc[common_countries]

# Make the column names ints not strings for handling
columns = list(employment_data.columns)
years = list(range(int(columns[0]), int(columns[-1])))
rename_dict = dict(zip(columns, years))
employment_data = employment_data.rename(columns=rename_dict)
hiv_data = hiv_data.rename(columns=rename_dict)
population = population.rename(columns=rename_dict)
life_expectancy_data = life_expectancy_data.rename(columns=rename_dict)
regions = regions.rename(columns=rename_dict)
regions_list = list(regions.Group.unique())

# Preprocess population data
scale_factor = 200
population = np.sqrt(population / np.pi) / scale_factor
min_size = 3
population = pd.DataFrame(population)
population = population.where(population >= min_size).fillna(min_size)

#print(employment_data.head())
#print(hiv_data.head())

p = pd.Panel({'employed': employment_data, 'hiv': hiv_data, 'population': population, 'life_expectancy':life_expectancy_data})

data = {}
region_name = regions.Group
region_name.name = 'region'
for year in years:
    df = pd.concat([p.loc[:, :, year], region_name], axis=1)
    data[year] = df.to_dict('series')

source = ColumnDataSource(data=data[years[0]])

plot = figure(x_range=(1, 100), y_range=(1, 100), title='Gapminder Data', plot_height=300)
plot.xaxis.ticker = SingleIntervalTicker(interval=20)
plot.xaxis.axis_label = "Employed"
plot.yaxis.ticker = SingleIntervalTicker(interval=20)
plot.yaxis.axis_label = "Life expentancy(in years)"

label = Label(x=10, y=30, text=str(years[0]), text_font_size='70pt', text_color='#eeeeee')
plot.add_layout(label)
color_mapper = CategoricalColorMapper(palette=palette, factors=regions_list)
plot.circle(
    x='employed',
    y='life_expectancy',
    size='population',
    source=source,
    fill_color={'field': 'region', 'transform': color_mapper},
    fill_alpha=0.8,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend=field('region'),
)
plot.add_tools(HoverTool(tooltips="@index", show_arrow=False, point_policy='follow_mouse'))


def animate_update():
    year = slider.value + 1
    if year > years[-1]:
        year = years[0]
    slider.value = year


def slider_update(attrname, old, new):
    year = slider.value
    label.text = str(year)
    source.data = data[year]


slider = Slider(start=years[0], end=years[-1], value=years[0], step=1, title="Year")
slider.on_change('value', slider_update)


def animate():
    if button.label == '► Play':
        button.label = '❚❚ Pause'
        curdoc().add_periodic_callback(animate_update, 200)
    else:
        button.label = '► Play'
        curdoc().remove_periodic_callback(animate_update)


button = Button(label='► Play', width=60)
button.on_click(animate)

layout = layout([
    [plot],
    [slider, button],
], sizing_mode='scale_width')

curdoc().add_root(layout)
curdoc().title = "Gapminder Analysis"
show(plot)