In [4]:
from IPython.display import display, HTML

import pandas as pd
import numpy as np

from jinja2 import Template

from bokeh.models import (
    ColumnDataSource, Plot, Circle, Range1d, 
    LinearAxis, HoverTool, Text,
    SingleIntervalTicker, Slider, CustomJS
)
from bokeh.palettes import Spectral6
from bokeh.resources import JSResources
from bokeh.embed import file_html


### Setting up the data
The plot animates with the slider showing the data over time from 1964 to 2013. We can think of each year as a seperate static plot, and when the slider moves, we use the Callback to change the data source that is driving the plot.

We could use bokeh-server to drive this change, but as the data is not too big we can also pass all the datasets to the javascript at once and switch between them on the client side.

This means that we need to build one data source for each year that we have data for and are going to switch between using the slider. We build them and add them to a dictionary `sources` that holds them under a key that is the name of the year preficed with a `_`.

In [26]:
N=20

df = pd.DataFrame({
   'A': pd.date_range(start='2016-01-01',periods=N,freq='D'),
   'x': np.linspace(0,stop=N-1,num=N),
   'y': np.random.rand(N)*10,
   'C': np.random.choice(['Low','Medium','High'],N).tolist(),
   'D': np.random.normal(100, 10, size=(N)).tolist()
})

print(df)

            A     x         y       C           D
0  2016-01-01   0.0  4.704095     Low  106.542170
1  2016-01-02   1.0  0.401594  Medium   90.153512
2  2016-01-03   2.0  5.589665  Medium  100.282395
3  2016-01-04   3.0  2.365791  Medium   88.355333
4  2016-01-05   4.0  3.730107  Medium  101.345031
5  2016-01-06   5.0  8.829996  Medium   97.694567
6  2016-01-07   6.0  0.102004    High   98.353313
7  2016-01-08   7.0  9.799471  Medium  115.070255
8  2016-01-09   8.0  8.716373     Low   82.715638
9  2016-01-10   9.0  6.136447    High  107.131664
10 2016-01-11  10.0  0.881107     Low   95.771823
11 2016-01-12  11.0  3.807441  Medium  105.629708
12 2016-01-13  12.0  8.981860    High   99.230754
13 2016-01-14  13.0  2.839425    High  100.090949
14 2016-01-15  14.0  1.378741    High   88.065075
15 2016-01-16  15.0  6.487007  Medium   98.364495
16 2016-01-17  16.0  2.793814     Low  101.208814
17 2016-01-18  17.0  9.626190    High   99.895669
18 2016-01-19  18.0  9.428074     Low  116.198111


In [44]:
data = np.random.rand(5,5)
colheaders = ['col1','col2','col3','col4','col5']
pdf = pd.DataFrame(data, columns=colheaders)
print(pdf)

def mult(ele1,ele2,ele3):
   return ele1*(ele2+ele3)
pdf = pdf.pipe(mult,10,0)
print(pdf)

for row_index,row in pdf.iterrows():
   print (row_index,'\n',row)

desc = pdf.describe()
print(desc)



       col1      col2      col3      col4      col5
0  0.714586  0.598491  0.121225  0.098260  0.885227
1  0.329456  0.814211  0.142805  0.280778  0.965179
2  0.034782  0.494071  0.491013  0.489423  0.926528
3  0.250434  0.002202  0.450860  0.674963  0.134178
4  0.160681  0.856028  0.993905  0.481675  0.192064
       col1      col2      col3      col4      col5
0  7.145862  5.984910  1.212251  0.982603  8.852270
1  3.294561  8.142106  1.428048  2.807784  9.651794
2  0.347824  4.940711  4.910133  4.894230  9.265282
3  2.504338  0.022016  4.508600  6.749629  1.341780
4  1.606808  8.560277  9.939054  4.816747  1.920636
0 
 col1    7.145862
col2    5.984910
col3    1.212251
col4    0.982603
col5    8.852270
Name: 0, dtype: float64
1 
 col1    3.294561
col2    8.142106
col3    1.428048
col4    2.807784
col5    9.651794
Name: 1, dtype: float64
2 
 col1    0.347824
col2    4.940711
col3    4.910133
col4    4.894230
col5    9.265282
Name: 2, dtype: float64
3 
 col1    2.504338
col2    0.022016

In [37]:
# data = pd.read_csv('assets/gapminder.csv', index_col="Year", thousands=",")
dfRuleTaskModels = pd.read_csv('assets/rule_task_models.csv')
print(dfRuleTaskModels.nunique())
dfRuleTaskModels.head(2)



id              540
domain_id         0
org_id            0
created_at      540
updated_at      540
deleted_at        0
task_guid         3
alias_ref         4
purpose           4
description       4
task_state        3
rule_id         135
rule_root_id    135
inputs           87
outputs         205
dtype: int64


Unnamed: 0,id,domain_id,org_id,created_at,updated_at,deleted_at,task_guid,alias_ref,purpose,description,task_state,rule_id,rule_root_id,inputs,outputs
0,646ecf68-7ed2-4010-8d4a-d001391a4f0d,,,2018-09-21 21:26:59.883982+00,2018-09-21 21:27:08.740501+00,,8fe8ece7-c997-d3f4-fc01-000000000000,t2,Generate Odoo Accounting Users Data,Fetch the list of Accounting users from the Odoo,3,0ccbf5a4-4580-4c3e-ab4f-148815c64ca9,0ccbf5a4-4580-4c3e-ab4f-148815c64ca9,"{""BucketName"":""demo"",""InputFileType"":""AccountU...","{""error"":""open AccountUsers-1537565227.csv: no..."
1,105e3d7c-5774-42cc-a2ae-9479d2fbaa62,,,2018-09-18 00:08:56.091309+00,2018-09-18 00:09:05.30732+00,,8fe8ece7-c997-d3f4-fc01-000000000000,t2,Generate Odoo Accounting Users Data,Fetch the list of Accounting users from the Odoo,3,cc22e544-2e2d-43b3-b7f4-80b4589a104d,cc22e544-2e2d-43b3-b7f4-80b4589a104d,"{""BucketName"":""demo"",""InputFileType"":""AccountU...","{""BucketName"":""demo"",""FileName"":""AccountUsers-..."


In [31]:
fertility_df, life_expectancy_df, population_df_size, regions_df, years, regions = process_data()

sources = {}

region_color = regions_df['region_color']
region_color.name = 'region_color'

for year in years:
    fertility = fertility_df[year]
    fertility.name = 'fertility'
    life = life_expectancy_df[year]
    life.name = 'life' 
    population = population_df_size[year]
    population.name = 'population' 
    new_df = pd.concat([fertility, life, population, region_color], axis=1)
    sources['_' + str(year)] = ColumnDataSource(new_df)

NameError: name 'process_data' is not defined

sources looks like this

```
{'_1964': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165cc0>,
 '_1965': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165b00>,
 '_1966': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d1656a0>,
 '_1967': <bokeh.models.sources.ColumnDataSource at 0x7f7e7d165ef0>,
 '_1968': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9dac18>,
 '_1969': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da9b0>,
 '_1970': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da668>,
 '_1971': <bokeh.models.sources.ColumnDataSource at 0x7f7e7e9da0f0>...

```
 
We will pass this dictionary to the Callback. In doing so, we will find that in our javascript we have an object called, for example _1964 that refers to our ColumnDataSource. Note that we needed the prefixing _ as JS objects cannot begin with a number.

Finally we construct a string that we can insert into our javascript code to define an object.

The string looks like this: `{1962: _1962, 1963: _1963, ....}`

Note the keys of this object are integers and the values are the references to our ColumnDataSources from above. So that now, in our JS code, we have an object that's storing all of our ColumnDataSources and we can look them up. 

In [3]:
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")

### Build the plot

In [4]:
# Set up the plot
xdr = Range1d(1, 9)
ydr = Range1d(20, 100)
plot = Plot(
    x_range=xdr,
    y_range=ydr,
    title="",
    plot_width=800,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None,    
)
AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

xaxis = LinearAxis(SingleIntervalTicker(interval=1), axis_label="Children per woman (total fertility)", **AXIS_FORMATS)
yaxis = LinearAxis(SingleIntervalTicker(interval=20), axis_label="Life expectancy at birth (years)", **AXIS_FORMATS)   
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')

### Add the background year text
We add this first so it is below all the other glyphs

In [5]:
# Add the year in background (add before circle)
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=2, y=35, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)

<bokeh.models.renderers.GlyphRenderer at 0x7fd2ff1cae48>

### Add the bubbles and hover
We add the bubbles using the Circle glyph. We start from the first year of data and that is our source that drives the circles (the other sources will be used later).

plot.add_glyph returns the renderer, and we pass this to the HoverTool so that hover only happens for the bubbles on the page and not other glyph elements.

In [6]:
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
    x='fertility', y='life', size='population',
    fill_color='region_color', fill_alpha=0.8, 
    line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))

### Add the legend

Finally we manually build the legend by adding circles and texts to the upper-right portion of the plot.

In [7]:
text_x = 7
text_y = 95
for i, region in enumerate(regions):
    plot.add_glyph(Text(x=text_x, y=text_y, text=[region], text_font_size='10pt', text_color='#666666'))
    plot.add_glyph(Circle(x=text_x - 0.1, y=text_y + 2, fill_color=Spectral6[i], size=10, line_color=None, fill_alpha=0.8))
    text_y = text_y - 5 

### Add the slider and callback
Last, but not least, we add the slider widget and the JS callback code which changes the data of the renderer_source (powering the bubbles / circles) and the data of the text_source (powering background text). After we've set() the `data` we need to trigger() a `change`. slider, renderer_source, text_source are all available because we add them as args to Callback.

It is the combination of `sources = %s % (js_source_array)` in the JS and `Callback(args=sources...)` that provides the ability to look-up, by year, the JS version of our python-made ColumnDataSource.

In [8]:
# Add the slider
code = """
    var year = slider.get('value'),
        sources = %s,
        new_source_data = sources[year].get('data');
    renderer_source.set('data', new_source_data);
    text_source.set('data', {'year': [String(year)]});
""" % js_source_array

callback = CustomJS(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback, name='testy')
callback.args["renderer_source"] = renderer_source
callback.args["slider"] = slider
callback.args["text_source"] = text_source

## Embed in a template and render
Last but not least, we use vplot to stick togethre the chart and the slider. And we embed that in a template we write using the script, div output from components.

We display it in IPython and save it as an html file.

In [1]:
# Stick the plot and the slider together
layout = vplot(plot, slider)

# Open our custom template
with open('gapminder_template.jinja', 'r') as f:
    template = Template(f.read())

# Use inline resources
js_resources = JSResources(mode='inline')    
html = file_html(layout, None, "Bokeh - Gapminder Bubble Plot", template=template, js_resources=js_resources)

display(HTML(html))

NameError: name 'vplot' is not defined