In [19]:
# -*- coding: utf-8 -*-
from bokeh.io import curdoc, output_notebook, output_file, show, save
import numpy as np
import pandas as pd
from collections import OrderedDict
import bokeh
from bokeh.core.properties import field
from bokeh.layouts import layout, column
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper
)
from bokeh.plotting import figure
from bokeh.palettes import Spectral6 as palette #@UnresolvedImport
from bokeh.sampledata.gapminder import regions
output_notebook()
output_file("plot.html")


def process_data(df, r1, r2):
    df = df.unstack().unstack()
    df = df[(df.index >= r1) & (df.index <= r2)]
    df = df.unstack().unstack()
    return df


def animate_update():
    year = slider.value + 1
    if year > years[-1]:
        year = years[0]
    slider.value = year


def slider_update(attrname, old, new):
    year = slider.value
    label.text = str(year)
    source.data = data[year]


def animate():
    if button.label == '► Play':
        button.label = '❚❚ Pause'
        curdoc().add_periodic_callback(animate_update, 200)
    else:
        button.label = '► Play'
        curdoc().remove_periodic_callback(animate_update)


employment_data = pd.read_excel('indicator_t above 15 employ.xlsx', encoding='utf8', index_col=0)
hiv_data = pd.read_excel('indicator hiv estimated prevalence% 15-49.xlsx', encoding='utf8', index_col=0)
life_expectancy_data = pd.read_excel('indicator life_expectancy_at_birth.xlsx', encoding='utf8', index_col=0)
population = pd.read_excel('indicator gapminder population.xlsx', encoding='utf8', index_col=0)
#per_capita_data = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', encoding='utf8', index_col=0)

employment_data = process_data(employment_data, 1991, 2015)
hiv_data = process_data(hiv_data, 1991, 2015)
life_expectancy_data = process_data(life_expectancy_data, 1991, 2015)
population = process_data(population, 1991, 2015)
#per_capita_data = process_data(per_capita_data, 1991, 2015)

# have common countries across all data
common_countries = (life_expectancy_data.index.intersection(employment_data.index)).intersection(hiv_data.index)
employment_data = employment_data.loc[common_countries]
population = population.loc[common_countries]
hiv_data = hiv_data.loc[common_countries]
life_expectancy_data = life_expectancy_data.loc[common_countries]
#per_capita_data = per_capita_data.loc[common_countries]

# find minimum maximum value of each dataset
min_employment = np.min(employment_data.values.flatten())
max_employment = np.max(employment_data.values.flatten())
min_hiv_data = np.min(hiv_data.values.flatten())
max_hiv_data = np.max(hiv_data.values.flatten())
min_life_expectancy_data = np.min(life_expectancy_data.values.flatten())
max_life_expectancy_data = np.max(life_expectancy_data.values.flatten())
print(min_hiv_data)
print(max_hiv_data)
print(min_life_expectancy_data)
print(max_life_expectancy_data)

# Make the column names ints not strings for handling
columns = list(employment_data.columns)
years = list(range(int(columns[0]), int(columns[-1])))
rename_dict = dict(zip(columns, years))
employment_data = employment_data.rename(columns=rename_dict)
hiv_data = hiv_data.rename(columns=rename_dict)
population = population.rename(columns=rename_dict)
life_expectancy_data = life_expectancy_data.rename(columns=rename_dict)
#per_capita_data = per_capita_data.rename(columns=rename_dict)
regions = regions.rename(columns=rename_dict)
regions_list = list(regions.Group.unique())

# Preprocess population data
scale_factor = 200
population = np.sqrt(population / np.pi) / scale_factor
min_size = 3
population = pd.DataFrame(population)
population = population.where(population >= min_size).fillna(min_size)

print(employment_data.head())
print(hiv_data.head())

p = pd.Panel(
    {'employed': employment_data, 'hiv': hiv_data, 'population': population, 'life_expectancy': life_expectancy_data})

data = {}
region_name = regions.Group
region_name.name = 'region'
for year in years:
    df = pd.concat([p.loc[:, :, year], region_name], axis=1)
    data[year] = df.to_dict('series')

TOOLS = "box_select,lasso_select,help"

# plot 1
source = ColumnDataSource(data=data[years[-1]])
p1 = figure(tools=TOOLS, x_range=(1, 100),
            y_range=(1, 100), title='Employed Data', plot_height=300)
p1.xaxis.ticker = SingleIntervalTicker(interval=20)
p1.xaxis.axis_label = "Employed"
p1.yaxis.ticker = SingleIntervalTicker(interval=20)
p1.yaxis.axis_label = "Life expectancy(in years)"
label = Label(x=10, y=30, text=str(years[-1]), text_font_size='70pt', text_color='#eeeeee')
p1.add_layout(label)
color_mapper = CategoricalColorMapper(palette=palette, factors=regions_list)
p1.circle(
    x='employed',
    y='life_expectancy',
    size='population',
    source=source,
    fill_color={'field': 'region', 'transform': color_mapper},
    fill_alpha=0.8,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend=field('region'),
)
p1.add_tools(HoverTool(tooltips=OrderedDict([('x', '@employed'), ('y', '@life_expectancy'), ('region', '@region')]),
                       show_arrow=False, point_policy='follow_mouse'))

# Plot 2
p2 = figure(tools=TOOLS, x_range=(1, 100), y_range=(0, 100),
            title='Scatter Data', plot_height=300)
p2.xaxis.ticker = SingleIntervalTicker(interval=20)
p2.xaxis.axis_label = "Employed"
p2.yaxis.ticker = SingleIntervalTicker(interval=20)
p2.yaxis.axis_label = "HIV prevalence"
label = Label(x=10, y=30, text=str(years[-1]), text_font_size='70pt', text_color='#eeeeee')
p2.add_layout(label)
color_mapper = CategoricalColorMapper(palette=palette, factors=regions_list)
p2.circle(
    x='employed',
    y='hiv',
    size='population',
    source=source,
    fill_color={'field': 'region', 'transform': color_mapper},
    fill_alpha=0.8,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend=field('region'),
)
p2.add_tools(HoverTool(tooltips=OrderedDict([('x', '@employed'), ('y', '@hiv'), ('region', '@region')]),
                       show_arrow=False, point_policy='follow_mouse'))

slider = Slider(start=years[0], end=years[-1], value=years[-1], step=1, title="Year")
slider.on_change('value', slider_update)

button = Button(label='► Play', width=60)
button.on_click(animate)

layout = layout([column([p1, p2], sizing_mode='scale_width'), [slider, button],], sizing_mode='scale_width')

#curdoc().add_root(layout)
#curdoc().title = "Gapminder Analysis"
show(layout, browser="firefox", notebook_url="*")

nan
nan
nan
nan
                  1991       1992       1993       1994       1995       1996  \
Afghanistan  56.700001  56.500000  56.599998  56.200001  56.200001  56.099998   
Albania      52.700001  52.299999  52.400002  52.700001  52.799999  52.599998   
Algeria      39.400002  38.900002  39.400002  39.400002  38.099998  38.900002   
Angola       75.800003  75.800003  75.500000  75.900002  75.800003  75.900002   
Argentina    53.599998  53.799999  53.700001  53.799999  53.500000  54.400002   

                  1997       1998       1999       2000       2001       2002  \
Afghanistan  56.200001  56.200001  56.099998  56.099998  56.500000  56.400002   
Albania      52.400002  52.099998  52.099998  51.900002  51.799999  51.799999   
Algeria      39.700001  39.500000  39.400002  38.599998  40.400002  41.500000   
Angola       75.699997  75.599998  75.599998  75.500000  75.500000  75.599998   
Argentina    54.900002  55.000000  54.900002  55.500000  55.599998  55.400002   

          

AttributeError: 'Document' object has no attribute 'references'