# Wealth of Nations.ipynb 
from example [Wealth of Nations.ipynb](https://github.com/bqplot/bqplot/blob/master/examples/Applications/Wealth%20of%20Nations.ipynb) 

In [1]:
import pandas as pd
import numpy as np
import os

from bqplot import (
    LogScale, LinearScale, OrdinalColorScale, ColorAxis,
    Axis, Scatter, Lines, CATEGORY10, Label, Figure, Tooltip
)

from ipywidgets import HBox, VBox, IntSlider, Play, jslink

In [3]:
initial_year = 1800

#### Cleaning and Formatting JSON Data


In [7]:
data = pd.read_json(os.path.abspath('data/data_files_nations.json'))

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 5 columns):
name              180 non-null object
region            180 non-null object
income            180 non-null object
population        180 non-null object
lifeExpectancy    180 non-null object
dtypes: object(5)
memory usage: 7.2+ KB


In [9]:
data

Unnamed: 0,name,region,income,population,lifeExpectancy
0,Angola,Sub-Saharan Africa,"[[1800, 359.93], [1820, 359.93], [1913, 556.12...","[[1800, 1567028], [1820, 1567028], [1940, 3738...","[[1800, 26.98], [1940, 26.98], [1950, 29.22], ..."
1,Benin,Sub-Saharan Africa,"[[1800, 553.72], [1820, 553.72], [1913, 855.53...","[[1800, 636559], [1820, 636559], [1950, 167266...","[[1800, 31], [1944, 31], [1950, 36.53], [1951,..."
2,Botswana,Sub-Saharan Africa,"[[1800, 407.36], [1820, 407.36], [1913, 629.4]...","[[1800, 121000], [1904, 121000], [1911, 125000...","[[1800, 33.6], [1945, 33.6], [1950, 46.82], [1..."
3,Burkina Faso,Sub-Saharan Africa,"[[1800, 454.33], [1820, 454.33], [1913, 497.44...","[[1800, 1665421], [1820, 1665421], [1950, 4376...","[[1800, 29.2], [1945, 29.2], [1950, 32.89], [1..."
4,Burundi,Sub-Saharan Africa,"[[1800, 447.59], [1820, 447.59], [1913, 353.82...","[[1800, 899097], [1820, 899097], [1950, 236252...","[[1800, 31.5], [1945, 31.5], [1950, 38.42], [1..."
...,...,...,...,...,...
175,Timor-Leste,East Asia & Pacific,"[[1800, 514.12], [1820, 518.73], [1850, 539.86...","[[1800, 137262], [1820, 137262], [1926, 442000...","[[1950, 28.97], [1951, 29.22], [1952, 29.73], ..."
176,Tokelau,East Asia & Pacific,"[[2005, 889.43]]","[[1800, 1009], [1843, 1000], [1850, 1000], [19...","[[2006, 69]]"
177,Tonga,East Asia & Pacific,"[[1800, 667.71], [1820, 667.71], [1913, 902.69...","[[1800, 18658], [1843, 18500], [1850, 18500], ...","[[1950, 57.91], [1951, 58.1], [1952, 58.47], [..."
178,Vietnam,East Asia & Pacific,"[[1800, 459.71], [1820, 459.71], [1870, 440.8]...","[[1800, 6551000], [1820, 6551000], [1870, 1052...","[[1800, 32], [1930, 32], [1936, 33], [1950, 39..."


In [10]:
def clean_data(data):
    for column in ['income', 'lifeExpectancy', 'population']:
        data = data.drop(data[data[column].apply(len) <= 4].index)
    return data

def extrap_interp(data):
    data = np.array(data)
    x_range = np.arange(1800, 2009, 1.)
    y_range = np.interp(x_range, data[:, 0], data[:, 1])
    return y_range

def extrap_data(data):
    for column in ['income', 'lifeExpectancy', 'population']:
        data[column] = data[column].apply(extrap_interp)
    return data

In [11]:
data = clean_data(data)
data = extrap_data(data)

In [12]:
data

Unnamed: 0,name,region,income,population,lifeExpectancy
0,Angola,Sub-Saharan Africa,"[359.93, 359.93, 359.93, 359.93, 359.93, 359.9...","[1567028.0, 1567028.0, 1567028.0, 1567028.0, 1...","[26.98, 26.98, 26.98, 26.98, 26.98, 26.98, 26...."
1,Benin,Sub-Saharan Africa,"[553.72, 553.72, 553.72, 553.72, 553.72, 553.7...","[636559.0, 636559.0, 636559.0, 636559.0, 63655...","[31.0, 31.0, 31.0, 31.0, 31.0, 31.0, 31.0, 31...."
2,Botswana,Sub-Saharan Africa,"[407.36, 407.36, 407.36, 407.36, 407.36, 407.3...","[121000.0, 121000.0, 121000.0, 121000.0, 12100...","[33.6, 33.6, 33.6, 33.6, 33.6, 33.6, 33.6, 33...."
3,Burkina Faso,Sub-Saharan Africa,"[454.33, 454.33, 454.33, 454.33, 454.33, 454.3...","[1665421.0, 1665421.0, 1665421.0, 1665421.0, 1...","[29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29.2, 29...."
4,Burundi,Sub-Saharan Africa,"[447.59, 447.59, 447.59, 447.59, 447.59, 447.5...","[899097.0, 899097.0, 899097.0, 899097.0, 89909...","[31.5, 31.5, 31.5, 31.5, 31.5, 31.5, 31.5, 31...."
...,...,...,...,...,...
174,Thailand,East Asia & Pacific,"[496.98, 496.98, 496.98, 496.98, 496.98, 496.9...","[4665000.0, 4665000.0, 4665000.0, 4665000.0, 4...","[30.4, 30.4, 30.4, 30.4, 30.4, 30.4, 30.4, 30...."
175,Timor-Leste,East Asia & Pacific,"[514.12, 514.3505, 514.581, 514.8115, 515.042,...","[137262.0, 137262.0, 137262.0, 137262.0, 13726...","[28.97, 28.97, 28.97, 28.97, 28.97, 28.97, 28...."
177,Tonga,East Asia & Pacific,"[667.71, 667.71, 667.71, 667.71, 667.71, 667.7...","[18658.0, 18654.325581395347, 18650.6511627907...","[57.91, 57.91, 57.91, 57.91, 57.91, 57.91, 57...."
178,Vietnam,East Asia & Pacific,"[459.71, 459.71, 459.71, 459.71, 459.71, 459.7...","[6551000.0, 6551000.0, 6551000.0, 6551000.0, 6...","[32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32...."


In [13]:
income_min, income_max = np.min(data['income'].apply(np.min)), np.max(data['income'].apply(np.max))
life_exp_min, life_exp_max = np.min(data['lifeExpectancy'].apply(np.min)), np.max(data['lifeExpectancy'].apply(np.max))
pop_min, pop_max = np.min(data['population'].apply(np.min)), np.max(data['population'].apply(np.max))

In [14]:
def get_data(year):
    year_index = year - 1800
    income = data['income'].apply(lambda x: x[year_index])
    life_exp = data['lifeExpectancy'].apply(lambda x: x[year_index])
    pop =  data['population'].apply(lambda x: x[year_index])
    return income, life_exp, pop

#### Creating the Tooltip to display the required fields
bqplot's native Tooltip allows us to simply display the data fields we require on a mouse-interaction.

In [16]:
tt = Tooltip(fields=['name', 'x', 'y'], labels=['Country Name', 'Income per Capita', 'Life Expectancy'])

#### Creating the Label to display the year
Staying true to the d3 recreation of the talk, we place a Label widget in the bottom-right of the Figure (it inherits the Figure co-ordinates when no scale is passed to it). With enable_move set to True, the Label can be dragged around.

In [17]:
year_label = Label(x=[0.75], y=[0.10], default_size=46, font_weight='bolder', colors=['orange'],
                   text=[str(initial_year)], enable_move=True)

#### Defining Axes and Scales
The inherent skewness of the income data favors the use of a LogScale. Also, since the color coding by regions does not follow an ordering, we use the OrdinalColorScale.

In [18]:
x_sc = LogScale(min=min(200, income_min), max=income_max)
y_sc = LinearScale(min=life_exp_min, max=life_exp_max)
c_sc = OrdinalColorScale(domain=data['region'].unique().tolist(), colors=CATEGORY10[:6])
size_sc = LinearScale(min=pop_min, max=pop_max)

In [19]:
ax_y = Axis(label='Life Expectancy', scale=y_sc, orientation='vertical', side='left', grid_lines='solid')

ticks = [2, 4, 6, 8, 10]
income_ticks = [t*100 for t in ticks] + [t*1000 for t in ticks] + [t*10000 for t in ticks]
ax_x = Axis(label='Income per Capita', scale=x_sc, grid_lines='solid', tick_format='~s', tick_values=income_ticks)

#### Creating the Scatter Mark with the appropriate size and color parameters passed
To generate the appropriate graph, we need to pass the population of the country to the size attribute and its region to the color attribute.

In [20]:
# Start with the first year's data
cap_income, life_exp, pop = get_data(initial_year)

In [21]:
wealth_scat = Scatter(x=cap_income, y=life_exp, color=data['region'], size=pop,
                      names=data['name'], display_names=False,
                      scales={'x': x_sc, 'y': y_sc, 'color': c_sc, 'size': size_sc},
                      default_size=4112, tooltip=tt, animate=True, stroke='Black',
                      unhovered_style={'opacity': 0.5})

In [22]:
nation_line = Lines(x=data['income'][0], y=data['lifeExpectancy'][0], colors=['Gray'],
                       scales={'x': x_sc, 'y': y_sc}, visible=False)

#### Creating the Figure

In [23]:
time_interval = 10

In [24]:
fig = Figure(marks=[wealth_scat, year_label, nation_line], axes=[ax_x, ax_y],
             title='Health and Wealth of Nations', animation_duration=time_interval)

#### Using a Slider to allow the user to change the year and a button for animation
Here we see how we can seamlessly integrate bqplot into the jupyter widget infrastructure.

In [26]:
year_slider = IntSlider(min=1800, max=2008, step=1, description='Year', value=initial_year)

When the hovered_point of the Scatter plot is changed (i.e. when the user hovers over a different element), the entire path of that country is displayed by making the Lines object visible and setting it's x and y attributes.

In [27]:
def hover_changed(change):
    if change.new is not None:
        nation_line.x = data[data['name'] == wealth_scat.names[change.new]]['income'].values[0]
        nation_line.y = data[data['name'] == wealth_scat.names[change.new]]['lifeExpectancy'].values[0]
        nation_line.visible = True
    else:
        nation_line.visible = False
        
wealth_scat.observe(hover_changed, 'hovered_point')

On the slider value callback (a function that is triggered everytime the value of the slider is changed) we change the x, y and size co-ordinates of the Scatter. We also update the text of the Label to reflect the current year.

In [28]:
def year_changed(change):
    wealth_scat.x, wealth_scat.y, wealth_scat.size = get_data(year_slider.value)
    year_label.text = [str(year_slider.value)]

year_slider.observe(year_changed, 'value')

#### Add an animation button¶

In [29]:
play_button = Play(min=1800, max=2008, interval=time_interval)
jslink((play_button, 'value'), (year_slider, 'value'))

Link(source=(Play(value=1800, interval=10, max=2008, min=1800), 'value'), target=(IntSlider(value=1800, descri…

#### Displaying the GUI¶

In [30]:
VBox([HBox([play_button, year_slider]), fig])

VBox(children=(HBox(children=(Play(value=1800, interval=10, max=2008, min=1800), IntSlider(value=1800, descrip…