In [2]:
import pandas as pd

from bokeh.io import output_file, show, curdoc
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider
from bokeh.palettes import Spectral6
from bokeh.layouts import widgetbox, row

In [5]:
# Data cleaning and preparation
data = pd.read_csv('/notebooks-for-articles/data/co2_emissions_tonnes_per_person.csv')
data.head()

FileNotFoundError: File b'/notebooks-for-articles/data/co2_emissions_tonnes_per_person.csv' does not exist

In [4]:
gapminder = pd.read_csv('data/gapminder_tidy.csv')
gapminder.head()

Unnamed: 0,Country,Year,fertility,life,population,child_mortality,gdp,region
0,Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1,Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
2,Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
3,Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
4,Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [21]:
df = gapminder[['Country', 'region']].drop_duplicates()
data_with_regions = pd.merge(data, df, left_on='country', right_on='Country', how='inner')
data_with_regions = data_with_regions.drop('Country', axis='columns')

data_with_regions.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,region
0,Afghanistan,,,,,,,,,,...,0.0637,0.0854,0.154,0.242,0.294,0.412,0.35,0.316,0.299,South Asia
1,Albania,,,,,,,,,,...,1.28,1.3,1.46,1.48,1.56,1.79,1.68,1.73,1.96,Europe & Central Asia
2,Algeria,,,,,,,,,,...,2.99,3.19,3.16,3.42,3.3,3.29,3.46,3.51,3.72,Middle East & North Africa
3,Angola,,,,,,,,,,...,1.1,1.2,1.18,1.23,1.24,1.25,1.33,1.25,1.29,Sub-Saharan Africa
4,Antigua and Barbuda,,,,,,,,,,...,4.91,5.14,5.19,5.45,5.54,5.36,5.42,5.36,5.38,America


In [22]:
new_df = pd.melt(data_with_regions, id_vars=['country', 'region'])
columns = ['country', 'region', 'year', 'co2']
new_df.columns = columns
new_df['year'] = new_df['year'].astype('int64')
new_df.head()

Unnamed: 0,country,region,year,co2
0,Afghanistan,South Asia,1800,
1,Albania,Europe & Central Asia,1800,
2,Algeria,Middle East & North Africa,1800,
3,Angola,Sub-Saharan Africa,1800,
4,Antigua and Barbuda,America,1800,


In [24]:
upd_new_df = new_df[new_df['year'].astype('int64') > 1963]
upd_new_df = upd_new_df.sort_values(by=['country', 'year'])
upd_new_df.head()

Unnamed: 0,country,region,year,co2
28372,Afghanistan,South Asia,1964,0.0863
28545,Afghanistan,South Asia,1965,0.101
28718,Afghanistan,South Asia,1966,0.108
28891,Afghanistan,South Asia,1967,0.124
29064,Afghanistan,South Asia,1968,0.116


In [25]:
df_gdp = gapminder[['Country', 'Year', 'gdp']]
df_gdp.columns = ['country', 'year', 'gdp']
df_gdp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10111 entries, 0 to 10110
Data columns (total 3 columns):
country    10111 non-null object
year       10111 non-null int64
gdp        9000 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 237.1+ KB


In [26]:
final_df = pd.merge(upd_new_df, df_gdp, on=['country', 'year'], how='left')
final_df = final_df.dropna()
final_df.head()

Unnamed: 0,country,region,year,co2,gdp
0,Afghanistan,South Asia,1964,0.0863,1182.0
1,Afghanistan,South Asia,1965,0.101,1182.0
2,Afghanistan,South Asia,1966,0.108,1168.0
3,Afghanistan,South Asia,1967,0.124,1173.0
4,Afghanistan,South Asia,1968,0.116,1187.0


In [27]:
# Creating visualization app with Bokeh.io
regions_list = final_df.region.unique().tolist()
color_mapper = CategoricalColorMapper(factors=regions_list, palette=Spectral6)

In [28]:
# Make the ColumnDataSource: source
source = ColumnDataSource(data={
    'x': final_df.gdp[final_df['year'] == 1964],
    'y': final_df.co2[final_df['year'] == 1964],
    'country': final_df.country[final_df['year'] == 1964],
    'region': final_df.region[final_df['year'] == 1964],
})

In [29]:
# Save the minimum and maximum values of the fertility column: xmin, xmax
xmin, xmax = min(final_df.gdp), max(final_df.gdp)

# Save the minimum and maximum values of the life expectancy column: ymin, ymax
ymin, ymax = min(final_df.co2), max(final_df.co2)

In [30]:
# Create the figure: plot
plot = figure(title='Gapminder Data for 1964', plot_height=400, plot_width=700,
              x_range=(xmin, xmax), y_range=(ymin, ymax))

# Add circle glyphs to the plot
plot.circle(x='x', y='y', fill_alpha=0.8, source=source, legend='region',
            color=dict(field='region', transform=color_mapper))

# Set the legend.location attribute of the plot to 'top_right'
plot.legend.location = 'top_right'

# Set the x-axis label
plot.xaxis.axis_label = 'GDP'

# Set the y-axis label
plot.yaxis.axis_label = 'CO2 emissions (tonnes per person)'

In [31]:
def update_plot(attr, old, new):
    # set the `yr` name to `slider.value` and `source.data = new_data`
    yr = slider.value

    new_data = {
        'x': final_df.gdp[final_df['year'] == yr],
        'y': final_df.co2[final_df['year'] == yr],
        'country': final_df.country[final_df['year'] == yr],
        'region': final_df.region[final_df['year'] == yr],
    }
    source.data = new_data

    # Add title to figure: plot.title.text
    plot.title.text = 'Gapminder data for %d' % yr

In [32]:
# Make a slider object: slider
slider = Slider(start=1964, end=2013, step=1, value=1964, title='Year')

# Attach the callback to the 'value' property of slider
slider.on_change('value', update_plot)

In [33]:
# Create a HoverTool: hover
hover = HoverTool(tooltips=[('Country', '@country')])

# Add the HoverTool to the plot
plot.add_tools(hover)

In [34]:
# Make a row layout of widgetbox(slider) and plot and add it to the current document
layout = row(widgetbox(slider), plot)
curdoc().add_root(layout)