In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/My Drive/Colab Notebooks

/content/drive/My Drive/Colab Notebooks


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
gm = pd.read_csv('gapminder.csv')

In [5]:
gm.head()

Unnamed: 0,Country,Year,fertility,life,population,child_mortality,gdp,region
0,Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1,Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
2,Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
3,Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
4,Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [6]:
df_gm = gm[['Country','region']].drop_duplicates()

In [7]:
co2 = pd.read_csv('co2.csv')

In [8]:
df_w_regions = pd.merge(co2,df_gm, left_on='country', right_on='Country', how='inner')

In [9]:
df_w_regions.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2007,2008,2009,2010,2011,2012,2013,2014,Country,region
0,Afghanistan,,,,,,,,,,...,0.0854,0.154,0.242,0.294,0.412,0.35,0.316,0.299,Afghanistan,South Asia
1,Albania,,,,,,,,,,...,1.3,1.46,1.48,1.56,1.79,1.68,1.73,1.96,Albania,Europe & Central Asia
2,Algeria,,,,,,,,,,...,3.19,3.16,3.42,3.3,3.29,3.46,3.51,3.72,Algeria,Middle East & North Africa
3,Angola,,,,,,,,,,...,1.2,1.18,1.23,1.24,1.25,1.33,1.25,1.29,Angola,Sub-Saharan Africa
4,Antigua and Barbuda,,,,,,,,,,...,5.14,5.19,5.45,5.54,5.36,5.42,5.36,5.38,Antigua and Barbuda,America


In [10]:
df_w_regions = df_w_regions.drop('Country', axis='columns')

In [11]:
new_co2 = pd.melt(df_w_regions, id_vars=['country', 'region'])
columns = ['country', 'region', 'year', 'co2']

new_co2.columns = columns

In [12]:
df_co2 = new_co2[new_co2['year'].astype('int64') > 1963]
df_co2 = df_co2.sort_values(by=['country', 'year'])
df_co2['year'] = df_co2['year'].astype('int64')


In [13]:
df_co2.head()

Unnamed: 0,country,region,year,co2
28372,Afghanistan,South Asia,1964,0.0863
28545,Afghanistan,South Asia,1965,0.101
28718,Afghanistan,South Asia,1966,0.108
28891,Afghanistan,South Asia,1967,0.124
29064,Afghanistan,South Asia,1968,0.116


We have a dataframe which contains co2 per year per country. The number is not in accending order because we did sort the data by country column and then the year column.

So now we will create GDP table per country per year as simmilar.

In [14]:
df_gdp = gm[['Country','Year','gdp']]

In [15]:
df_gdp.columns = ['country','year','gdp']

In [16]:
df_gdp.head()

Unnamed: 0,country,year,gdp
0,Afghanistan,1964,1182.0
1,Afghanistan,1965,1182.0
2,Afghanistan,1966,1168.0
3,Afghanistan,1967,1173.0
4,Afghanistan,1968,1187.0


We have 1 table for co2 and 1 table for gdp. Now now it is a time to merge them

In [17]:
data= pd.merge(df_co2,df_gdp, on=['country','year'], how='left')
data=data.dropna()

In [18]:
data.head()

Unnamed: 0,country,region,year,co2,gdp
0,Afghanistan,South Asia,1964,0.0863,1182.0
1,Afghanistan,South Asia,1965,0.101,1182.0
2,Afghanistan,South Asia,1966,0.108,1168.0
3,Afghanistan,South Asia,1967,0.124,1173.0
4,Afghanistan,South Asia,1968,0.116,1187.0


It is a time to check the corrlelation between co2 and gdp

In [19]:
np_co2 = np.array(data['co2'])
np_gdp = np.array(data['gdp'])

In [20]:
np.corrcoef(np_co2, np_gdp)

array([[1.        , 0.78219731],
       [0.78219731, 1.        ]])

In conclusion, there is a high correlation between co2 and gdp.

**Bokeh**

In [32]:
from bokeh.io import curdoc, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider

curdoc in bokeh.io will return the current default state of the document or plot

The figure from bokeh.plotting will create the figure for plotting.

HoverTool, ColumnDataSource, CategoricalColorMapper, and Slider from bokeh.models are all the tools and methods for mapping the data from pandas DataFrame to a data source for plotting.

Spectra16 is a color palette for the plot.

In [23]:
from bokeh.palettes import Spectral6
from bokeh.layouts import widgetbox, row

widgetbox and row will create a column of redefiend tools (including zoom), while row creates a row of bokeh layout objects forcing them to have the same sizing_mode.

if we run the output_notebook() function, it will enables the plot to be displayed within the notebook

In [24]:
output_notebook()

In [25]:
# We will create a list of regions by applying unique() function on the region column in the DataFrame
regions_list = data.region.unique().tolist() #to list function will make a list

In [27]:
# We will assign color from Spectral6 package to different regions present in the region list
color_mapper = CategoricalColorMapper(factors=regions_list, palette = Spectral6)

In [33]:
# We will create a data source for the plot by using ColumnDataSource
source = ColumnDataSource(data={
    'x': data.gdp[data['year'] == 1964],
    'y': data.co2[data['year'] == 1964],
    'country': data.country[data['year'] == 1964],
    'region': data.region[data['year'] == 1964],
})


In [34]:
# We will store the minimum and maximum GDP values
xmin,xmax = min(data.gdp), max(data.gdp)

In [35]:
# We will store the minimum and maximum co2 values
ymin,ymax = min(data.co2), max(data.co2)

In [36]:
# We will create the empty figure
plot = figure(title='CO2 Emissions vs GDP in 1964', 
              plot_height=600, plot_width=1000,
              x_range=(xmin, xmax),
              y_range=(ymin, ymax), y_axis_type='log')


In [38]:
plot.circle(x='x', y='y', fill_alpha=0.8, source=source, legend='region', color=dict(field='region', transform=color_mapper), size=7)




In [40]:
# Set the legend to the bottom-right corner
plot.legend.location = 'bottom_right'
# We will set the x-axis title as the Income per person
plot.xaxis.axis_label = 'Income per person'
# We will set the y-axis title as CO2 Emissions (tons per person)
plot.yaxis.axis_label = 'CO2 Emissions (tons per person)'


In [41]:
show(plot)

**Slider**


We will add a slider for the year column of DF to our plot. There are 5 steps to implement that:


1.   Set the start as first year in the year column

1.   Set the end as last year in the year colum
1.   Set step as 1. Because with each movement of slider, there will be an increment with the value of 1
2.   Set the value as the minimum value of the year column


2.   Set the title as year




In [57]:
slider = Slider(start=min(data.year), end = max(data.year), step=1, value=min(data.year), title='Year')

In [58]:
def update_plot(attr, old, new):
    # set the `yr` name to `slider.value` and `source.data = new_data`
    year = slider.value

    new_data = {
        'x': data.gdp[data['year'] == year],
        'y': data.co2[data['year'] == year],
        'country': data.country[data['year'] == year],
        'region': data.region[data['year'] == year],
    }
    source.data = new_data

    # Add title to figure: plot.title.text
    plot.title.text = 'CO2 Emissions vs GDP in %d' % year

In [59]:

layout = row(widgetbox(slider), plot)
curdoc().add_root(layout)



**Hover Tool**

In [52]:
hover = HoverTool(tooltips=[('Country', '@country'), ('GDP', '@x'), ('CO2 Emission', '@y')])

The above code will allow user to hover above a datapoint on our plot to see the name of the country, the carbon emmisions and the GDP. 

In [53]:
plot.add_tools(hover)

In [54]:
layout = row(widgetbox(slider), plot)
curdoc().add_root(layout)



In [61]:
show(plot)