In [544]:
import os
import numpy as np
import pandas as pd

from bokeh.io import show
from bokeh.io import output_notebook


from bokeh.models import Plot
from bokeh.models import Range1d
from bokeh.models import LinearAxis
from bokeh.models import SingleIntervalTicker
from bokeh.models import ColumnDataSource

from bokeh.palettes import Spectral6
from bokeh.charts import BoxPlot

from bokeh.layouts import layout
from bokeh.layouts import widgetbox

from bokeh.embed import file_html


from bokeh.models import Text
from bokeh.models import Slider
from bokeh.models import Circle
from bokeh.models import CustomJS
from bokeh.models import HoverTool
from bokeh.models import LinearAxis


In [545]:
output_notebook()

In [177]:
#import bokeh.sampledata
#bokeh.sampledata.download()

In [546]:
regions = pd.read_csv(os.path.abspath('bokeh_region1.txt'), sep='\t')
rtype = pd.read_csv(os.path.abspath('bokeh_rtype.txt'), sep='\t')
vclass = pd.read_csv(os.path.abspath('bokeh_vclass.txt'), sep='\t')
totalv = pd.read_csv(os.path.abspath('bokeh_totalv.txt'), sep='\t')

In [547]:
rtype.fillna(value=0, inplace=True)
vclass.fillna(value=0, inplace=True)
totalv.fillna(value=0, inplace=True)
regions.set_index('Country', inplace=True )
vclass.set_index('Country', inplace=True )
rtype.set_index('Country', inplace=True )
totalv.set_index('Country', inplace=True )
regions.head()

Unnamed: 0_level_0,Group,ID
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
AE,Africa,AE
AG,Africa,AG
AN,Africa,AN
AR,South America,AR
BB,Africa,BB


In [550]:
def process_data():
    #from bokeh.sampledata.gapminder import fertility, life_expectancy, population, regions
    # Make the column names ints not strings for handling    
    columns = list(vclass.columns)
    years = list(range(int(columns[0]), int(columns[-1])))
    rename_dict = dict(zip(columns, years))
    
    bvclass = vclass.rename(columns=rename_dict)
    brtype = rtype.rename(columns=rename_dict)
    bregions = regions.rename(columns=rename_dict)
    btotalv = totalv.rename(columns=rename_dict)

    # Turn total vessel into bubble sizes. Use min_size and factor to tweak.
    scale_factor = 2
    btotalv_size = np.sqrt((btotalv*1000) / np.pi) / scale_factor
    min_size = .1
    btotalv_size = btotalv_size.where(btotalv_size >= min_size).fillna(min_size)
    
    # Use pandas categories and categorize & color the regions
    regions.Group = regions.Group.astype('category')
    regions_list = list(regions.Group.cat.categories)

    def get_color(r):
        return Spectral6[regions_list.index(r.Group)]
    regions['region_color'] = regions.apply(get_color, axis=1)
    
    return brtype, bvclass, years, bregions, regions_list, btotalv_size


In [551]:
rtype_df, vclass_df, years, bregions, regions_list, btotalv_size = process_data()
#print(bregions)

sources = {}

region_color = bregions['region_color']
region_color.name = 'region_color'

for year in years:
    sclass = vclass_df[year]
    sclass.name = 'vesselclass'
    stype = rtype_df[year]
    stype.name = 'routetype' 
    tvessel = btotalv_size[year]
    tvessel.name = 'totalvessel' 
    new_df = pd.concat([sclass, stype, tvessel, region_color], axis=1)
    sources['_' + str(year)] = ColumnDataSource(new_df)   


In [569]:
print(sources)

{'_2012': <bokeh.models.sources.ColumnDataSource object at 0x00000000099BFDD8>, '_2000': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB24710>, '_2004': <bokeh.models.sources.ColumnDataSource object at 0x00000000099BFCF8>, '_2002': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB3EC88>, '_2001': <bokeh.models.sources.ColumnDataSource object at 0x000000000AAFD0F0>, '_2007': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB85320>, '_2003': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB2D1D0>, '_2009': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB857F0>, '_2010': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB85978>, '_2008': <bokeh.models.sources.ColumnDataSource object at 0x000000000AAAF4E0>, '_2014': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB854A8>, '_2013': <bokeh.models.sources.ColumnDataSource object at 0x000000000AB3E8D0>, '_2005': <bokeh.models.sources.ColumnDataSource obj

### Build the plot

First we need to create a `Plot` object. We'll start with a basic frame, only specifying things like plot height, width, and ranges for the axes.

In [552]:
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))

In [553]:
js_source_array = str(dictionary_of_sources).replace("'", "")
js_source_array

'{2000: _2000, 2001: _2001, 2002: _2002, 2003: _2003, 2004: _2004, 2005: _2005, 2006: _2006, 2007: _2007, 2008: _2008, 2009: _2009, 2010: _2010, 2011: _2011, 2012: _2012, 2013: _2013, 2014: _2014}'

In [554]:
xdr = Range1d(1, 20)
ydr = Range1d(1, 20)
plot = Plot(
    x_range=xdr,
    y_range=ydr,
    plot_width=800,
    plot_height=400,
    outline_line_color=None,
    toolbar_location=None, 
    min_border=20,
)

In [555]:
show(plot)

### Build the axes

Next we can make some stylistic modifications to the plot axes (e.g. by specifying the text font, size, and color, and by adding labels), to make the plot look more like the one in Hans Rosling's video.

In [556]:
AXIS_FORMATS = dict(
    minor_tick_in=None,
    minor_tick_out=None,
    major_tick_in=None,
    major_label_text_font_size="10pt",
    major_label_text_font_style="normal",
    axis_label_text_font_size="10pt",

    axis_line_color='#AAAAAA',
    major_tick_line_color='#AAAAAA',
    major_label_text_color='#666666',

    major_tick_line_cap="round",
    axis_line_cap="round",
    axis_line_width=1,
    major_tick_line_width=1,
)

xaxis = LinearAxis(ticker=SingleIntervalTicker(interval=2), axis_label="Vessel Class", **AXIS_FORMATS)
yaxis = LinearAxis(ticker=SingleIntervalTicker(interval=2), axis_label="Route Type", **AXIS_FORMATS)   
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')

In [557]:
show(plot)

In [558]:
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=4, y=5, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)

<bokeh.models.renderers.GlyphRenderer at 0xab95320>

In [559]:
show(plot)

### Add the bubbles and hover
Next we will add the bubbles using Bokeh's [`Circle`](http://bokeh.pydata.org/en/latest/docs/reference/plotting.html#bokeh.plotting.figure.Figure.circle) glyph. We start from the first year of data, which is our source that drives the circles (the other sources will be used later). 

In [560]:
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
    x='vesselclass', y='routetype', size='totalvessel',
    fill_color='region_color', fill_alpha=0.8, 
    line_color='#7c7e71', line_width=0.5, line_alpha=0.5)

circle_renderer = plot.add_glyph(renderer_source, circle_glyph)

In [561]:
# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))

In [562]:
 show(plot)

### Add the legend

Next we will manually build a legend for our plot by adding circles and texts to the upper-righthand portion:

In [563]:
#print(bregions)
text_x = 2
text_y = 18
for i, region in enumerate(regions_list):
    plot.add_glyph(Text(x=text_x, y=text_y, text=[region], text_font_size='10pt', text_color='#666666'))
    plot.add_glyph(Circle(x=text_x -.2, y=text_y+.5, fill_color=Spectral6[i], size=10, line_color=None, fill_alpha=0.8))
    text_y = text_y - 1.2

In [564]:
show(plot)

In [565]:
# Add the slider
code = """
    var year = slider.get('value'),
        sources = %s,
        new_source_data = sources[year].get('data');
    renderer_source.set('data', new_source_data);
    text_source.set('data', {'year': [String(year)]});
""" % js_source_array

callback = CustomJS(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["renderer_source"] = renderer_source
callback.args["slider"] = slider
callback.args["text_source"] = text_source

In [566]:
 show(widgetbox(slider))

### Putting all the pieces together

Last but not least, we put the chart and the slider together in a layout and display it inline in the notebook.

In [567]:
show(layout([[plot], [slider]], sizing_mode='scale_width'))