# Exploring with Bokeh charts

In [None]:
import pandas as pd

from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook

Use the `WHO.csv` dataset in the `data` folder to build an interactive chart with Bokeh.

In [None]:
who_df = pd.read_csv('../data/WHO.csv')

# examine dataset
who_df.head()

In [None]:
# how many rows, columns?
who_df.shape

In [None]:
# how many countries in each region?
who_df.Region.value_counts()

In [None]:
# get column names for easy copy/pasting
who_df.columns

I think I would like to make a scatter plot of child mortality against life expectancy. Let's investigate those two columns.

In [None]:
# examine ChildMortality
who_df.ChildMortality.describe()

In [None]:
# examine LifeExpectancy
who_df.LifeExpectancy.describe()

Seems like there are numeric values for all countries, since min and max are numbers for both columns I just examined. No cleaning or dleting of rows will be required.

Let's start with a Bokeh chart. 

See the URLs in the comments in the next cell for how I figured out the things.

In [None]:
# make a chart where x-axis is ChildMortality and y-axis is LifeExpectancy 

# for tools, see https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html 
TOOLS = "zoom_in,zoom_out,hover,pan,crosshair,reset"

# define data source as the data frame
source = ColumnDataSource(who_df)

# for tooltips, see same page - the @ things are column names from the defined source 
TOOLTIPS = [
    ("country", "@Country"),
    ("mortality", "@ChildMortality"),
    ("life exp", "@LifeExpectancy")
]

# for figure, see https://bokeh.pydata.org/en/latest/docs/reference/plotting.html 
p = figure(tools=TOOLS,
           tooltips=TOOLTIPS,
           x_axis_label="Child Mortality per 1,000 Live Births",
           y_axis_label="Life Expectancy in Years",
           title="WHO Data: Child Mortality and Life Expectancy",
           plot_width=900)

# for scatter, see https://bokeh.pydata.org/en/latest/docs/gallery/color_scatter.html 
p.scatter('ChildMortality', 'LifeExpectancy', 
          source=source,
          line_color="#6666ee",
          fill_color="#ee6666", 
          fill_alpha=0.6,
          size=16)

# if you delete or comment out the next line, chart opens in new browser tab instead
output_notebook()

# show the chart defined above as "p" 
show(p)


In [None]:
# added code for exporting the chart as stand-alone HTML + JS
# https://bokeh.pydata.org/en/latest/docs/user_guide/embed.html 
from bokeh.resources import CDN
from bokeh.embed import file_html

# create a complete HTML file (p is the variable from above)
html = file_html(p, CDN, "bokeh_WHO_data")

# regular python to write the file
newfile = open('bokeh_WHO_data.html', 'w')
newfile.write(html)
newfile.close()


I would like to color the dots by region, but that will wait for another day. 