In [8]:
# -*- coding: utf-8 -*-
from bokeh.io import output_notebook, show, output_file, save
from bokeh.application.handlers.function import FunctionHandler
from bokeh.application.application import Application

output_file("plots.html")

def make_doc(doc):
    import yaml
    import numpy as np
    import pandas as pd
    from collections import OrderedDict
    from bokeh.themes import Theme
    from bokeh.core.properties import field
    from bokeh.layouts import layout, column
    from bokeh.models import (
        ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
        CategoricalColorMapper, Plot, DataRange1d
    )
    from bokeh.sampledata.gapminder import regions
    from bokeh.plotting import figure
    from bokeh.models.glyphs import MultiLine
    from bokeh.palettes import Viridis7 as palette1 #@UnresolvedImport
    from bokeh.palettes import Spectral7 as palette2 #@UnresolvedImport

    def process_data(df, r1, r2):
        df = df.unstack().unstack()
        df = df[(df.index >= r1) & (df.index <= r2)]
        df = df.unstack().unstack()
        return df


    def animate_update():
        year = slider.value + 1
        if year > years[-1]:
            year = years[0]
        slider.value = year


    def slider_update(attrname, old, new):
        year = slider.value
        label1.text = str(year)
        label2.text = str(year)
        label3.text = str(year)
        source.data = data[year]


    def animate():
        if button.label == '► Play':
            button.label = '❚❚ Pause'
            doc.add_periodic_callback(animate_update, 200)
        else:
            button.label = '► Play'
            doc.remove_periodic_callback(animate_update)


    employment_data = pd.read_excel('indicator_t above 15 employ.xlsx', encoding='utf8', index_col=0)
    hiv_data = pd.read_excel('indicator hiv estimated prevalence% 15-49.xlsx', encoding='utf8', index_col=0)
    life_expectancy_data = pd.read_excel('indicator life_expectancy_at_birth.xlsx', encoding='utf8', index_col=0)
    population = pd.read_excel('indicator gapminder population.xlsx', encoding='utf8', index_col=0)
    per_capita_data = pd.read_excel('indicator gapminder gdp_per_capita_ppp.xlsx', encoding='utf8', index_col=0)
    per_capita_data = per_capita_data.reset_index().drop_duplicates(subset='GDP per capita', keep='last').set_index('GDP per capita')

    employment_data = process_data(employment_data, 1991, 2015)
    hiv_data = process_data(hiv_data, 1991, 2015)
    life_expectancy_data = process_data(life_expectancy_data, 1991, 2015)
    population = process_data(population, 1991, 2015)
    per_capita_data = process_data(per_capita_data, 1991, 2015)

    common_countries = (life_expectancy_data.index.intersection(employment_data.index)).intersection(hiv_data.index)
    employment_data = employment_data.loc[common_countries]
    population = population.loc[common_countries]
    hiv_data = hiv_data.loc[common_countries]
    life_expectancy_data = life_expectancy_data.loc[common_countries]
    per_capita_data = per_capita_data.loc[common_countries]

    # Make the column names ints not strings for handling
    columns = list(employment_data.columns)
    years = list(range(int(columns[0]), int(columns[-1])))
    rename_dict = dict(zip(columns, years))
    employment_data = employment_data.rename(columns=rename_dict)
    hiv_data = hiv_data.rename(columns=rename_dict)
    population = population.rename(columns=rename_dict)
    life_expectancy_data = life_expectancy_data.rename(columns=rename_dict)
    per_capita_data = per_capita_data.rename(columns=rename_dict)
    regions = regions.rename(columns=rename_dict)
    regions_list = list(regions.Group.unique())

    # Preprocess population data
    scale_factor = 200
    population = np.sqrt(population / np.pi) / scale_factor
    min_size = 3
    population = pd.DataFrame(population)
    population = population.where(population >= min_size).fillna(min_size)

    p = pd.Panel(
        {'employed': employment_data, 'hiv': hiv_data, 'population': population, 'per_capita': per_capita_data, 'life_expectancy': life_expectancy_data})

    data = {}
    region_name = regions.Group
    region_name.name = 'region'
    for year in years:
        df = pd.concat([p.loc[:, :, year], region_name], axis=1)
        data[year] = df.to_dict('series')

    TOOLS = "pan,wheel_zoom,box_zoom,reset,box_select,lasso_select"
    color_mapper = CategoricalColorMapper(palette=palette2, factors=regions_list)
    source = ColumnDataSource(data=data[years[-1]])
    
    # plot 0
    xdr = DataRange1d()
    ydr = DataRange1d()
    plot_title = 'Year vs Per-capita (in $ PPP adjusted of 5 most populated countries)'
    p0 = figure(tools=TOOLS, x_range=xdr,
                y_range=ydr, title=plot_title, plot_height=300)
    p0.xaxis.axis_label = "Year"
    p0.yaxis.axis_label = "Per capita (in $)"
    highly_populated_countries = population.sort_values(years[-1], ascending=False)
    highly_populated_countries = highly_populated_countries.head(5)
    for i in range(5):
        p0.line(highly_populated_countries.columns, per_capita_data.loc[highly_populated_countries.index[i], highly_populated_countries.columns].values, line_color=palette2[i], line_width=2, legend=highly_populated_countries.index[i])

    
    # plot 1
    p1 = figure(tools=TOOLS, x_range=(1, 100),
                y_range=(1, 100), title='Employed vs Life Expectancy (Bubble size: population)', plot_height=300)
    p1.xaxis.ticker = SingleIntervalTicker(interval=20)
    p1.xaxis.axis_label = "Employed (in %)"
    p1.yaxis.ticker = SingleIntervalTicker(interval=20)
    p1.yaxis.axis_label = "Life expectancy(in years)"
    label1 = Label(x=10, y=30, text=str(years[-1]), text_font_size='70pt', text_color='#eeeeee')
    p1.add_layout(label1)
    p1.circle(
        x='employed',
        y='life_expectancy',
        size='population',
        source=source,
        fill_color={'field': 'region', 'transform': color_mapper},
        fill_alpha=0.8,
        line_color='#7c7e71',
        line_width=0.5,
        line_alpha=0.5,
        legend=field('region'),
    )
    p1.add_tools(HoverTool(tooltips=OrderedDict([('x', '@employed'), ('y', '@life_expectancy'), ('region', '@region')]),
                           show_arrow=False, point_policy='follow_mouse'))

    # Plot 2
    p2 = figure(tools=TOOLS, x_range=(1, 100), y_range=(0, 100),
                title='Employed vs HIV prevalence (Bubble size: population)', plot_height=300)
    p2.xaxis.ticker = SingleIntervalTicker(interval=20)
    p2.xaxis.axis_label = "Employed (in %)"
    p2.yaxis.ticker = SingleIntervalTicker(interval=20)
    p2.yaxis.axis_label = "HIV prevalence (in %)"
    label2 = Label(x=10, y=30, text=str(years[-1]), text_font_size='70pt', text_color='#eeeeee')
    p2.add_layout(label2)
    p2.circle(
        x='employed',
        y='hiv',
        size='population',
        source=source,
        fill_color={'field': 'region', 'transform': color_mapper},
        fill_alpha=0.8,
        line_color='#7c7e71',
        line_width=0.5,
        line_alpha=0.5,
        legend=field('region'),
    )
    p2.add_tools(HoverTool(tooltips=OrderedDict([('x', '@employed'), ('y', '@hiv'), ('region', '@region')]),
                           show_arrow=False, point_policy='follow_mouse'))

    slider = Slider(start=years[0], end=years[-1], value=years[-1], step=1, title="Year")
    slider.on_change('value', slider_update)

    # Plot 3
    p3 = figure(tools=TOOLS, x_range=(1, 100), y_range=(1, 70000),
                title='Employed vs Per capita PPP adjusted (Bubble size: population)', plot_height=300)
    p3.xaxis.ticker = SingleIntervalTicker(interval=20)
    p3.xaxis.axis_label = "Employed (in %)"
    p3.yaxis.ticker = SingleIntervalTicker(interval=5000)
    p3.yaxis.axis_label = "Per capita PPP adjusted (in $)"
    label3 = Label(x=10, y=30, text=str(years[-1]), text_font_size='70pt', text_color='#eeeeee')
    p3.add_layout(label3)
    p3.circle(
        x='employed',
        y='per_capita',
        size='population',
        source=source,
        fill_color={'field': 'region', 'transform': color_mapper},
        fill_alpha=0.8,
        line_color='#7c7e71',
        line_width=0.5,
        line_alpha=0.5,
        legend=field('region'),
    )
    p3.add_tools(HoverTool(tooltips=OrderedDict([('x', '@employed'), ('y', '@per_capita'), ('region', '@region')]),
                           show_arrow=False, point_policy='follow_mouse'))

  
    slider = Slider(start=years[0], end=years[-1], value=years[-1], step=1, title="Year")
    slider.on_change('value', slider_update)
    
    button = Button(label='► Play', width=60)
    button.on_click(animate)

    layout = layout([column([p0, p1, p2, p3], sizing_mode='scale_width'), [slider, button],], sizing_mode='scale_width')

    doc.add_root(layout)
    doc.title = "Gapminder Analysis"
    doc.theme = Theme(json=yaml.load("""
        attrs:
          Plot:
            outline_line_color: null
            toolbar_location: null
            min_border: 20

          Axis:
            minor_tick_in: null
            minor_tick_out: null
            major_tick_in: null
            major_label_text_font_size: '10pt'
            major_label_text_font_style: 'normal'
            axis_label_text_font_size: '10pt'

            axis_line_color: '#AAAAAA'
            major_tick_line_color: '#AAAAAA'
            major_label_text_color: '#666666'

            major_tick_line_cap: 'round'
            axis_line_cap: 'round'
            axis_line_width: 1
            major_tick_line_width: 1

          Grid:
            grid_line_alpha: 0.4

    """))

app = Application(FunctionHandler(make_doc))
doc = app.create_document()
show(app)