# Interactive Dasboard using Bokeh: Gapminder

# Importing Packages

In [1]:
import pandas as pd
import numpy as np
# Importing Bokeh Packages
from bokeh.io import output_file, show, curdoc
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider, Select
from bokeh.palettes import Spectral6
from bokeh.layouts import widgetbox, row

# Loading Data

In [2]:
data = pd.read_csv('https://assets.datacamp.com/production/course_1392/datasets/gapminder_tidy.csv', index_col='Year')

In [3]:
data.head()

Unnamed: 0_level_0,Country,fertility,life,population,child_mortality,gdp,region
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1964,Afghanistan,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1965,Afghanistan,7.671,34.152,10697983.0,334.1,1182.0,South Asia
1966,Afghanistan,7.671,34.662,10927724.0,328.7,1168.0,South Asia
1967,Afghanistan,7.671,35.17,11163656.0,323.3,1173.0,South Asia
1968,Afghanistan,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [4]:
data.shape

(10111, 7)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10111 entries, 1964 to 2006
Data columns (total 7 columns):
Country            10111 non-null object
fertility          10100 non-null float64
life               10111 non-null float64
population         10108 non-null float64
child_mortality    9210 non-null float64
gdp                9000 non-null float64
region             10111 non-null object
dtypes: float64(5), object(2)
memory usage: 552.9+ KB


In [6]:
data.describe()

Unnamed: 0,fertility,life,population,child_mortality,gdp
count,10100.0,10111.0,10108.0,9210.0,9000.0
mean,4.028719,64.0786,25604480.0,80.834505,12746.916667
std,2.013968,11.122779,103238300.0,79.220942,17797.809953
min,0.836,6.0,1170.0,2.0,142.0
25%,2.17575,56.2825,887406.2,18.9,2263.0
50%,3.6325,67.157,4765781.0,50.7,6438.5
75%,5.90525,72.484,14769950.0,121.775,15887.25
max,9.223,83.58,1359368000.0,435.8,182668.0


# Checking Data quality

In [7]:
data.duplicated().sum()

0

In [8]:
data.isnull().sum().sum()

2026

In [9]:
data.isnull().sum()

Country               0
fertility            11
life                  0
population            3
child_mortality     901
gdp                1111
region                0
dtype: int64

# Designing UI for Gapminder Dashboard using Bokeh

In [10]:
# Make the ColumnDataSource: source
source = ColumnDataSource(data={
    'x'       : data.loc[1970].fertility,
    'y'       : data.loc[1970].life,
    'country'      : data.loc[1970].Country,
    'pop'      : (data.loc[1970].population / 20000000) + 2,
    'region'      : data.loc[1970].region,
})

In [11]:
# Save the minimum and maximum values of the fertility column: xmin, xmax
xmin, xmax = min(data.fertility), max(data.fertility)

# Save the minimum and maximum values of the life expectancy column: ymin, ymax
ymin, ymax = min(data.life), max(data.life)

In [12]:
# Create the figure: plot
plot = figure(title='Gapminder Data for 1970', plot_height=400, plot_width=700,
              x_range=(xmin, xmax), y_range=(ymin, ymax))

In [13]:
# Set the x-axis label
plot.xaxis.axis_label ='Fertility (children per woman)'

# Set the y-axis label
plot.yaxis.axis_label = 'Life Expectancy (years)'

In [14]:
# Make a list of the unique values from the region column: regions_list
regions_list = data.region.unique().tolist()

In [15]:
# Make a color mapper: color_mapper
color_mapper = CategoricalColorMapper(factors= regions_list, palette=Spectral6)

In [16]:
# Add circle glyph with a color mapper
plot.circle(x='x', y='y', fill_alpha=0.8, source=source,
            color=dict(field='region', transform=color_mapper),
            legend='region')

In [17]:
# Set the legend.location attribute of the plot to 'top_right'
plot.legend.location = 'top_right'

In [18]:
# Create a HoverTool: hover
hover = HoverTool(tooltips=[('Country', '@country')])

# Add the HoverTool to the plot
plot.add_tools(hover)

In [19]:
# Define the callback function
def callback(attr, old, new):
    # Read the current value off the slider and 2 dropdowns: yr, x, y
    yr = slider.value
    x = x_select.value
    y = y_select.value
    # Label axes of plot
    plot.xaxis.axis_label = x
    plot.yaxis.axis_label = y
    # Set new_data
    new_data = {
        'x':            data.loc[yr][x],
        'y':            data.loc[yr][y],
        'country':      data.loc[yr].Country,
        'pop':          (data.loc[yr].population / 20000000) + 2,
        'region':       data.loc[yr].region,
    }
    # Assign new_data to source.data
    source.data = new_data

    # Set the range of all axes
    plot.x_range.start = min(data[x])
    plot.x_range.end = max(data[x])
    plot.y_range.start = min(data[y])
    plot.y_range.end = max(data[y])

    # Add updating title to plot
    plot.title.text = 'Gapminder data for %d' % yr

In [20]:
# Create widgets
slider = Slider(start=1970, end=2010, step=1, value=1970, title='Year')

x_select = Select(
    options=['fertility', 'life', 'child_mortality', 'gdp'],
    value='fertility',
    title='x-axis data')

y_select = Select(
    options=['fertility', 'life', 'child_mortality', 'gdp'],
    value='life',
    title='y-axis data')

In [21]:
# Attach callbacks to widgets
slider.on_change('value', callback)
x_select.on_change('value', callback)
y_select.on_change('value', callback)

In [22]:
# Create layout and add to current document
layout = row(widgetbox(slider, x_select, y_select), plot)
curdoc().add_root(layout)
curdoc().title = 'Gapminder'