In [400]:
import pandas as pd
import numpy as np
import re

import plotly.plotly as py
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [93]:
init_notebook_mode(connected=True) 

In [550]:
#temp = pd.read_csv('GlobalTemps.csv')
coemm = pd.read_csv('CO2Emm_Global_1970-16.csv')
engcons = pd.read_csv('EU_EngConsPerCap_00-16.csv')

# Data cleaning

In [281]:
coemm.head()

Unnamed: 0,ISO_CODE,ISO_NAME,Year,GHG per capita emissions,CO2/cap
0,AFG,Afghanistan,1970,1.557705,0.156962
1,ALB,Albania,1970,3.355742,2.062545
2,DZA,Algeria,1970,4.645362,1.207575
3,AGO,Angola,1970,6.772412,1.191541
4,AIA,Anguilla,1970,0.776791,0.338751


In [241]:
coemm['Year'].max()

2016

In [15]:
coemm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9870 entries, 0 to 9869
Data columns (total 5 columns):
ISO_CODE                    9870 non-null object
ISO_NAME                    9870 non-null object
Year                        9870 non-null int64
GHG per capita emissions    9030 non-null float64
CO2/cap                     9870 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 385.6+ KB


In [282]:
engcons.head()

Unnamed: 0,Year,kg of oil equivalent per capita,Country
0,2000,123,Albania
1,2000,791,Austria
2,2000,0,Bosnia and Herzegovina
3,2000,924,Belgium
4,2000,264,Bulgaria


In [257]:
engcons['Year'].max()

2016

In [283]:
engcons['Country'].unique()

array(['Albania', 'Austria', 'Bosnia and Herzegovina', 'Belgium',
       'Bulgaria', 'Cyprus', 'Czech Republic', 'Germany', 'Denmark',
       'Estonia', 'Greece', 'Spain', 'Finland', 'France', 'Croatia',
       'Hungary', 'Ireland', 'Iceland', 'Italy', 'Lithuania',
       'Luxembourg', 'Latvia', 'Montenegro', 'Macedonia', 'Malta',
       'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Serbia',
       'Sweden', 'Slovenia', 'Slovakia', 'Turkey', 'United Kingdom',
       'Kosovo'], dtype=object)

In [284]:
coemm = coemm[coemm['Year'] > 1999]

In [286]:
coemm.reset_index().drop(['index'],axis=1,inplace=True)

# Renaming country names to match engcons dataframe

- coemm has different country names to engcons
- attempted to rename one country first

In [337]:
# Grab string to rename
# To be iterated
uk_str = coemm[coemm['ISO_NAME'].str.contains('United Kingdom')]['ISO_NAME'].iloc[0]
print(uk_str)

United Kingdom of Great Britain and Northern Ireland (the)


In [523]:
# Replace sliced column with renamed column
# To be iterated
# uk_renamed = coemm.loc[coemm['ISO_NAME'] == uk_str]
# uk_renamed = uk_renamed.replace(to_replace={'ISO_NAME':{uk_str:'United Kingdom'}})

coemm['ISO_NAME'] = coemm['ISO_NAME'].replace({uk_str:'United Kingdom'})

In [512]:
# Check if rename was successful
coemm[coemm['ISO_NAME'] == 'United Kingdom'].head()

Unnamed: 0,ISO_CODE,ISO_NAME,Year,GHG per capita emissions,CO2/cap
6497,GBR,United Kingdom,2000,11.75803,9.225049
6707,GBR,United Kingdom,2001,11.84552,9.436021
6917,GBR,United Kingdom,2002,11.47246,9.110215
7127,GBR,United Kingdom,2003,11.56776,9.281403
7337,GBR,United Kingdom,2004,11.39856,9.227768


In [None]:
# Iterate 

In [551]:
engcons_country_list = engcons['Country'].unique().tolist()

In [481]:
# To pick up errors where names are different
# For example Czechia and Czech Republic name difference
# Kosovo, status disputed according to Wikipedia
# For simplicity they will be removed
to_be_renamed_list = []
for i in range(engcons_range):
    try:
        to_be_renamed_list.append(coemm[coemm['ISO_NAME'].str.contains(engcons_country_list[i])]['ISO_NAME'].iloc[0])
    except IndexError:
        pass

In [552]:
# Removed Czech Republic and Kosovo
engcons_country_list = [e for e in engcons_country_list if e not in ('Czech Republic', 'Kosovo')]
engcons_range = len(engcons_country_list)

In [553]:
# Now I can iterate without error handling
to_be_renamed_list = []
for i in range(engcons_range):
        to_be_renamed_list.append(coemm[coemm['ISO_NAME'].str.contains(engcons_country_list[i])]['ISO_NAME'].iloc[0])

In [554]:
# Replace sliced column with renamed column
for j in range(engcons_range):
    coemm['ISO_NAME'] = coemm['ISO_NAME'].replace({to_be_renamed_list[j]:engcons_country_list[j]})

In [564]:
coemm[coemm['ISO_NAME'] == 'United Kingdom'].head()

Unnamed: 0,ISO_CODE,ISO_NAME,Year,GHG per capita emissions,CO2/cap
197,GBR,United Kingdom,1970,15.76062,12.13728
407,GBR,United Kingdom,1971,15.64406,11.94266
617,GBR,United Kingdom,1972,15.07697,11.52187
827,GBR,United Kingdom,1973,15.73715,12.06927
1037,GBR,United Kingdom,1974,14.93535,11.27633


In [525]:
# Checking where I cut off 2012-2016 when I merged coemm and engcons
# This was due to the two country columns not having the same names
coemm['Year'].max()

2016

In [565]:
coemm = coemm.reset_index().drop(['index'],axis=1)

In [566]:
coemm.rename(columns={"ISO_NAME":"Country","ISO_CODE":"Code"},inplace=True)

In [567]:
merged = coemm.merge(engcons, how='inner', on=['Year', 'Country'])

In [568]:
# Merged data has 2013-2016 data missing due to name difference
merged['Year'].max()

2016

In [569]:
merged.head()

Unnamed: 0,Code,Country,Year,GHG per capita emissions,CO2/cap,kg of oil equivalent per capita
0,ALB,Albania,2000,2.320918,1.0062,123
1,AUT,Austria,2000,10.36811,8.220142,791
2,BEL,Belgium,2000,14.67313,11.85222,924
3,BIH,Bosnia and Herzegovina,2000,4.694694,3.774173,0
4,BGR,Bulgaria,2000,7.603958,5.878875,264


In [545]:
# merged = merged.reset_index().drop(['index'],axis=1)

In [570]:
merged.columns = [x.lower() for x in merged.columns]

In [571]:
merged.head()

Unnamed: 0,code,country,year,ghg per capita emissions,co2/cap,kg of oil equivalent per capita
0,ALB,Albania,2000,2.320918,1.0062,123
1,AUT,Austria,2000,10.36811,8.220142,791
2,BEL,Belgium,2000,14.67313,11.85222,924
3,BIH,Bosnia and Herzegovina,2000,4.694694,3.774173,0
4,BGR,Bulgaria,2000,7.603958,5.878875,264


# Map plotting

In [138]:
merged_2000 = merged[merged['year'] == 2000]

In [143]:
data = dict(
        type = 'choropleth',
        locations = merged_2000['code'],
        z = merged_2000['ghg per capita emissions'],
        text = merged_2000['country'],
        colorbar = {'title' : 'GHG Emissions'},) 

In [144]:
layout = dict(
    title = '2000 Greenhouse Gas Emissions',
    geo = dict(
        scope = 'europe'))

In [160]:
choromap = dict(data = [data],layout = layout)
iplot(choromap)

In [580]:
# Create a list for year
years_list = list(merged['year'].unique().astype(str))

In [602]:
merged_piv = merged.pivot_table(index=['code'],columns='year', values='co2/cap')

In [603]:
merged_piv.head()

year,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ALB,1.0062,1.075421,1.248918,1.350672,1.409903,1.344544,1.343873,1.394059,1.395093,1.26505,1.384648,1.476388,1.441218,1.501637,1.650724,1.707743,1.777583
AUT,8.220142,8.765426,8.9509,9.544022,9.64083,9.736343,9.373612,9.082319,9.143795,8.20318,8.930128,8.792819,8.610191,8.593714,8.121575,8.369108,8.468899
BEL,11.85222,11.81872,11.03801,11.74084,11.3471,10.92195,10.58676,10.14001,10.41467,9.33825,9.946639,8.962264,8.440513,8.474506,7.978163,8.256059,8.309019
BGR,5.878875,6.199531,5.909999,6.728912,6.558176,6.75475,6.977927,7.53746,7.199023,6.161931,6.566002,7.285842,7.251792,6.542882,7.048781,7.537466,7.135051
BIH,3.774173,3.723906,3.911332,4.002134,4.227815,4.434418,4.929181,5.206201,5.725029,5.71547,5.903874,6.715541,6.668547,6.702908,6.831988,7.190721,7.293784


In [604]:
merged_piv.reset_index(inplace=True)

In [605]:
merged_piv.columns = ['code']+years_list

In [614]:
merged_piv.head()

Unnamed: 0,code,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,ALB,1.0062,1.075421,1.248918,1.350672,1.409903,1.344544,1.343873,1.394059,1.395093,1.26505,1.384648,1.476388,1.441218,1.501637,1.650724,1.707743,1.777583
1,AUT,8.220142,8.765426,8.9509,9.544022,9.64083,9.736343,9.373612,9.082319,9.143795,8.20318,8.930128,8.792819,8.610191,8.593714,8.121575,8.369108,8.468899
2,BEL,11.85222,11.81872,11.03801,11.74084,11.3471,10.92195,10.58676,10.14001,10.41467,9.33825,9.946639,8.962264,8.440513,8.474506,7.978163,8.256059,8.309019
3,BGR,5.878875,6.199531,5.909999,6.728912,6.558176,6.75475,6.977927,7.53746,7.199023,6.161931,6.566002,7.285842,7.251792,6.542882,7.048781,7.537466,7.135051
4,BIH,3.774173,3.723906,3.911332,4.002134,4.227815,4.434418,4.929181,5.206201,5.725029,5.71547,5.903874,6.715541,6.668547,6.702908,6.831988,7.190721,7.293784


### Adding a drop down list

In [615]:
data_all = []

In [616]:
data_2000 = [dict(type='choropleth',
                locations = merged_piv['code'],
                z = merged_piv['2000'],
                text = merged_piv['code'],
                colorbar = dict(title = "CO2"))]
    
data_all.extend(data_2000)

In [617]:
for i in years[1:]:
    data_upd = [dict(type='choropleth',
                      locations = merged_piv['code'],
                      z = merged_piv[i],
                      text = merged_piv['code'],
                      colorbar = dict(title = "CO2"))]
    
    data_all.extend(data_upd)

In [618]:
# menus inside plot
steps = []
yr = 0
for i in range(0,len(data_all)):
    step = dict(method = "restyle",
                args = ["visible", [False]*len(data_all)],
                label = years[yr]) 
    step['args'][1][i] = True
    steps.append(step)
    yr += 1

sliders = [dict(active = 10,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 50},
                steps = steps)]


In [619]:
# set the layout
layout = dict(title = 'CO2',
              geo = dict(scope='europe'
                        ),
              sliders = sliders
             )

In [620]:
fig = dict(data=data_all, layout=layout)
iplot(fig, filename='d3-cloropleth-map')