## Author: Emmanuel Rodriguez

Date: 11 Aug 2022

## Objective: Visualize CO2 Emissions Per Capita

Download dataset from https://datatopics.worldbank.org/world-development-indicators/?msclkid=905295b0cef211eca247786f37737c73

## Using Folium Library for Geographic Overlays

Folium repo: https://github.com/python-visualization/folium

In [1]:
# Import libraries
import folium # provides capability of visualizing data on a map
import pandas as pd

## Country coordinates for plotting

Source: https://github.com/python-visualization/folium/blob/main/examples/data/world-countries.json

Download the raw form, save with the file extension .json: https://raw.githubusercontent.com/python-visualization/folium/main/examples/data/world-countries.json

In [6]:
country_geo = 'world-countries2.json'

In [3]:
# Read the WDI database
data = pd.read_csv(r'C:\Users\ejoaq\OneDrive\1 My_Notebook\2 Entrepreneurship and Work\1 Teaching\1 National University\1 ANA 500 - Python for Data Science\My Notebooks - from edX course\WDI_csv\WDIData.csv')
# Prefix with 'r' to produce raw string
data.shape

(384370, 66)

In [4]:
data.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,16.559819,16.936004,17.337896,17.687092,18.140971,18.491344,18.82552,19.272212,19.628009,
1,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,,,,,...,6.281667,6.499471,6.680066,6.85911,7.016238,7.180364,7.322294,7.517191,7.651598,
2,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,,,,,...,37.601816,37.855399,38.046781,38.326255,38.468426,38.670044,38.722783,38.927016,39.042839,
3,Africa Eastern and Southern,AFE,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,31.844384,31.79416,32.001027,33.87191,38.880173,40.261358,43.061877,44.27086,45.803485,
4,Africa Eastern and Southern,AFE,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,,,,,...,19.402592,18.663502,17.633986,16.464681,24.531436,25.345111,27.449908,29.64176,30.404935,


In [9]:
# Extract CO2 emissions for all countries in 2018
hist_indicator = 'CO2 emissions (metric tons per capita)'


In [10]:
hist_year = '2018'

mask1 = data['Indicator Name'].str.contains(hist_indicator, regex=False) # Regex treats the input arg as a string literal, 
# a string literal is a string that appears in the actual code of the program
data2018 = data[hist_year] # Get data for the specified year

# Apply mask
co2_2018 = data2018[mask1]
#co2_2018 = data2018[mask1].dropna(axis=0) # Drop ROWS that contain NaN

print(type(co2_2018)) # The type of object, and the class that it's derived from
print(co2_2018.shape) # a tuple of the shape of the underlying data.
print(co2_2018.dtype) # dtype object of the underlying data.
print(co2_2018.ndim) # # of dimensions of the underlying data.
print(co2_2018.size) # # of elements in the underlying data.
print(co2_2018.index) # The index (axis labels) of the Series.
co2_2018.head()

<class 'pandas.core.series.Series'>
(266,)
float64
1
266
Int64Index([   193,   1638,   3083,   4528,   5973,   7418,   8863,  10308,
             11753,  13198,
            ...
            370113, 371558, 373003, 374448, 375893, 377338, 378783, 380228,
            381673, 383118],
           dtype='int64', length=266)


193     0.933541
1638    0.515544
3083    4.438716
4528    5.017034
5973    6.597232
Name: 2018, dtype: float64

## Prepare data for plotting

In [11]:
# Match the country code with the co2_2018 data

co2_2018_idx = co2_2018.index
print(type(co2_2018_idx))

<class 'pandas.core.indexes.numeric.Int64Index'>


In [12]:
co2_2018_countryCode = data['Country Code'].iloc[co2_2018_idx] #iloc, integer location based on index

In [13]:
co2_2018_countryCode

193       AFE
1638      AFW
3083      ARB
4528      CSS
5973      CEB
         ... 
377338    VIR
378783    PSE
380228    YEM
381673    ZMB
383118    ZWE
Name: Country Code, Length: 266, dtype: object

In [14]:
#DataFrame with just the country codes and the values to be plotted.
# Create dictionary to feed into the DataFrame
d = {'Country Code': co2_2018_countryCode, 'CO2, 2018':co2_2018}
plot_data = pd.DataFrame(d)
plot_data

Unnamed: 0,Country Code,"CO2, 2018"
193,AFE,0.933541
1638,AFW,0.515544
3083,ARB,4.438716
4528,CSS,5.017034
5973,CEB,6.597232
...,...,...
377338,VIR,
378783,PSE,
380228,YEM,0.326682
381673,ZMB,0.446065


## Visualization of CO2 emissions per capita using Folium

In [16]:
# Setup a folium map at a high-level zoom view
map = folium.Map(location=[50,0], zoom_start = 1.5)

In [17]:
# Use the method 'choropleth' to bind the DataFrame and json geometries

map.choropleth(geo_data=country_geo, data=plot_data,
              columns=['Country Code', 'CO2, 2018'],
              key_on='feature.id',
              fill_color='YlGnBu', fill_opacity=0.7,
              line_opacity=0.2, legend_name='CO2 emissions per capita in 2018')



In [18]:
# Create and save Folium plot
map.save('plot_data.html')

In [19]:
from IPython.display import HTML
HTML('<iframe src=plot_data.html width=700 height=450></iframe>')

