In [112]:
# Data handling libraries
import pandas as pd
pd.options.display.max_rows = 10000
pd.options.display.max_columns = 10000
import numpy as np
import boto3
import io
import requests as req

# File manipulation libraries
import os
import pickle

# Data visualization libraries
import matplotlib.pyplot as plt
%matplotlib inline

# Initialize S3 client, location of files for this project
s3_client = boto3.client('s3')
s3_resource = boto3.resource('s3')

s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/blog_data/GHG-GDP_Divergence_D3/"

RAW_DATA = s3_folder + "Raw_Data/"
PROCESSED_DATA = s3_folder + "Processed_Data/"
FINAL_DATA = s3_folder + "Final_Data/"
CONVERSIONS = s3_folder + "Conversions/"

# Functions for reading and uploading data to/from S3
def read_from_S3(bucket, key, index_col=0):
    obj = s3_client.get_object(Bucket=bucket, Key=key)
    df = pd.read_csv(io.BytesIO(obj['Body'].read()), index_col=[index_col], encoding="utf8")
    return(df)

def write_to_S3(df, bucket, key):
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer)
    s3_resource.Object(bucket, key).put(Body=csv_buffer.getvalue())

Load Raw Data from S3

In [2]:
# These four files are derived from the original CDIAC data sheet
# They were initially cleaned (using code outlined at the bottom of this notebook)
# And then uploaded to Amazon S3
file_names = ['Territorial Emissions GCB', 
              'Consumption Emissions GCB', 
              'Emissions Transfers GCB', 
              'Territorial Emissions CDIAC']

# Initialize a dictionary to store the raw data
cdiac_raw_data = {}

# Load each of the raw datasets from S3
# Reference: https://stackoverflow.com/questions/37703634/how-to-import-a-text-file-on-aws-s3-into-pandas-without-writing-to-disk
for file in file_names:
    cdiac_raw_data[file] = read_from_S3(s3_bucket, RAW_DATA+file+".csv")

In [144]:
#cdiac_raw_data["Territorial Emissions GCB"].head()
#cdiac_raw_data["Consumption Emissions GCB"].head()
#cdiac_raw_data["Emissions Transfers GCB"].head()
#cdiac_raw_data["Territorial Emissions CDIAC"].head()

Convert raw data to pct_change data for territorial and consumption emissions, load to S3

In [237]:
# Territory data
territorial_emissions_abs_raw = cdiac_raw_data["Territorial Emissions GCB"]

territory_gcb_abs_val = territorial_emissions_abs_raw.loc[2000:2015].transpose()
territory_gcb_abs_val['Summary Range'] = territory_gcb_abs_val[2015] - territory_gcb_abs_val[2000]
territory_gcb_abs_val["Summary Range Years"] = "2000-2015"

territory_gcb_pct_change = territorial_emissions_abs_raw.loc[1999:2015].transpose().pct_change(axis=1).loc[:,2000:]
territory_gcb_pct_change['Summary Range'] = (territory_gcb_abs_val[2015]/territory_gcb_abs_val[2000]) - 1
territory_gcb_pct_change["Summary Range Years"] = "2000-2015"

territory_gcb_abs_change = territorial_emissions_abs_raw.loc[1999:2015].transpose().diff(periods=1, axis=1).loc[:,2000:]
territory_gcb_abs_change['Summary Range'] = territory_gcb_abs_val[2015] - territory_gcb_abs_val[2000]
territory_gcb_abs_change["Summary Range Years"] = "2000-2015"


# Consumption data
consumption_emissions_abs_raw = cdiac_raw_data["Consumption Emissions GCB"]

consumption_gcb_abs_val = consumption_emissions_abs_raw.loc[2000:2014].transpose()
consumption_gcb_abs_val['Summary Range'] = consumption_gcb_abs_val[2014] - consumption_gcb_abs_val[2000]
consumption_gcb_abs_val['Summary Range Years'] = '2000-2014'

consumption_gcb_pct_change = consumption_emissions_abs_raw.loc[1999:2014].transpose().pct_change(axis=1).loc[:,2000:]
consumption_gcb_pct_change['Summary Range'] = (consumption_gcb_abs_val[2014]/consumption_gcb_abs_val[2000]) - 1
consumption_gcb_pct_change['Summary Range Years'] = '2000-2014'

consumption_gcb_abs_change = consumption_emissions_abs_raw.loc[1999:2014].transpose().diff(periods=1,axis=1).loc[:,2000:]
consumption_gcb_abs_change['Summary Range'] = consumption_gcb_abs_val[2014] - consumption_gcb_abs_val[2000]
consumption_gcb_abs_change['Summary Range Years'] = '2000-2014'


# Upload these percent change figures to S3

write_to_S3(territory_gcb_abs_val, s3_bucket, PROCESSED_DATA + \
                   "Territorial Emissions GCB absolute values 2000-2015.csv")
write_to_S3(territory_gcb_pct_change, s3_bucket, PROCESSED_DATA + \
                   "Territorial Emissions GCB percent changes 2000-2015.csv")
write_to_S3(territory_gcb_abs_change, s3_bucket, PROCESSED_DATA + \
                   "Territorial Emissions GCB absolute changes 2000-2015.csv")

write_to_S3(consumption_gcb_abs_val, s3_bucket, PROCESSED_DATA + \
                   "Consumption Emissions GCB absolute values 2000-2014.csv")
write_to_S3(consumption_gcb_pct_change, s3_bucket, PROCESSED_DATA + \
                   "Consumption Emissions GCB percent changes 2000-2014.csv")
write_to_S3(consumption_gcb_abs_change, s3_bucket, PROCESSED_DATA + \
                   "Consumption Emissions GCB absolute changes 2000-2014.csv")

Download Conversions used to align CDIAC, World Bank, and ISO3 country designations

In [121]:
# CDIAC names to World Bank names
cdiac_to_wb_name_conversion = read_from_S3(s3_bucket, CONVERSIONS+"CDIAC to World Bank name conversion.csv")
# World Bank names to ISO3 codes
wb_name_to_iso3_conversion = read_from_S3(s3_bucket, CONVERSIONS+"World Bank to ISO3 name conversion.csv")

Create final data for the D3 application by adding ISO3 codes to the CDIAC pct change data

In [238]:
# Download pct_change data from S3
territory_gcb_abs_val = read_from_S3(s3_bucket, PROCESSED_DATA+"Territorial Emissions GCB absolute values 2000-2015.csv")
consumption_gcb_abs_val = read_from_S3(s3_bucket, PROCESSED_DATA+"Consumption Emissions GCB absolute values 2000-2015.csv")

territory_gcb_pct_change = read_from_S3(s3_bucket, PROCESSED_DATA+"Territorial Emissions GCB percent changes 2000-2015.csv")
consumption_gcb_pct_change = read_from_S3(s3_bucket, PROCESSED_DATA+"Consumption Emissions GCB percent changes 2000-2015.csv")

territory_gcb_abs_change = read_from_S3(s3_bucket, PROCESSED_DATA+"Territorial Emissions GCB percent changes 2000-2015.csv")
consumption_gcb_abs_change = read_from_S3(s3_bucket, PROCESSED_DATA+"Consumption Emissions GCB percent changes 2000-2015.csv")

dfs = {
    "Production CO2 Emissions Absolute Value":territory_gcb_abs_val,
    "Consumption CO2 Emissions Absolute Value":consumption_gcb_abs_val,
    "Production CO2 Emissions Percent Change":territory_gcb_pct_change,
    "Consumption CO2 Emissions Percent Change":consumption_gcb_pct_change,
    "Production CO2 Emissions Absolute Change":territory_gcb_abs_change,
    "Consumption CO2 Emissions Absolute Change":consumption_gcb_abs_change
}


# Name for Congo didn't match in the CDIAC data and crosswalk file
def replace_congo(name):
    if name == "Congo":
        return("Congo (Rep)")
    else:
        return(name)
    
# Add the wb_name to each dataframe
def fetch_name(name):
    try:
        return(cdiac_to_wb_name_conversion.loc[name][0])
    except:
        return(np.nan)
    
def add_iso(name):
    try:
        return(wb_name_to_iso3_conversion.loc[name,"ISO"])
    except:
        return(np.nan)

for df_name, df in dfs.items():
    print(df_name)
    df.index = list(map(replace_congo, df.index))
    df["Country Name"] = list(map(fetch_name, df.index)) 
    df = df.loc[pd.notnull(df["Country Name"])]
    df = df.set_index("Country Name")
    df["ISO"] = list(map(add_iso, df.index))
    df = df.loc[pd.notnull(df["ISO"])]
    df["Indicator"] = df_name
    dfs[df_name] = df

Production CO2 Emissions Absolute Value
Consumption CO2 Emissions Absolute Value
Production CO2 Emissions Percent Change
Consumption CO2 Emissions Percent Change
Production CO2 Emissions Absolute Change
Consumption CO2 Emissions Absolute Change


In [239]:
# Export final files
write_to_S3(dfs["Production CO2 Emissions Absolute Value"], s3_bucket, FINAL_DATA + "Territory Emissions GCB absolute values with ISO3 2000-2015.csv")
write_to_S3(dfs["Consumption CO2 Emissions Absolute Value"], s3_bucket, FINAL_DATA + "Consumption Emissions GCB absolute values with ISO3 2000-2015.csv")

write_to_S3(dfs["Production CO2 Emissions Percent Change"], s3_bucket, FINAL_DATA + "Territory Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv")
write_to_S3(dfs["Consumption CO2 Emissions Percent Change"], s3_bucket, FINAL_DATA + "Consumption Emissions GCB percent changes with ISO3 2000-2014 plus summary data.csv")

write_to_S3(dfs["Production CO2 Emissions Absolute Change"], s3_bucket, FINAL_DATA + "Territory Emissions GCB absolute changes with ISO3 2000-2015 plus summary data.csv")
write_to_S3(dfs["Consumption CO2 Emissions Absolute Change"], s3_bucket, FINAL_DATA + "Consumption Emissions GCB absolute changes with ISO3 2000-2014 plus summary data.csv")

In [200]:
# Territory or Consumption?
emissions_type = "Territory"
# absolute values or percent changes?
metric = "absolute values"

df = read_from_S3(s3_bucket, FINAL_DATA + \
                  "{} Emissions GCB {} with ISO3 2000-2015 plus summary data.csv".format(emissions_type,metric) \
                  , index_col="ISO")
df

Unnamed: 0_level_0,Country Name,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,Summary Range,Summary Range Years,Indicator
ISO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
ALB,Albania,0.824,0.879,1.023,1.171,1.136,1.16,1.063,1.071,1.193,1.194,1.254,1.429,1.285,1.313,1.3081,1.323801,0.499801,2000-2015,Production CO2 Emissions Absolute Value
DZA,Algeria,23.979,22.987,24.776,25.234,24.405,29.214,27.588,29.86,30.079,33.115,32.5,33.048,35.448,36.601,39.257647,40.921606,16.942606,2000-2015,Production CO2 Emissions Absolute Value
AND,Andorra,0.143,0.143,0.145,0.146,0.154,0.157,0.149,0.147,0.147,0.141,0.141,0.134,0.134,0.134,0.134952,0.135947,-0.007053,2000-2015,Production CO2 Emissions Absolute Value
AGO,Angola,2.602,2.654,3.454,2.472,5.125,5.224,6.072,6.859,7.011,7.579,7.924,8.274,9.108,8.853,8.848174,8.990173,6.388173,2000-2015,Production CO2 Emissions Absolute Value
ATG,Antigua and Barbuda,0.094,0.094,0.099,0.106,0.111,0.112,0.116,0.128,0.131,0.139,0.143,0.14,0.143,0.143,0.14648,0.149771,0.055771,2000-2015,Production CO2 Emissions Absolute Value
ARG,Argentina,38.761,36.466,34.01,36.832,42.975,44.208,47.842,47.771,51.57,49.076,51.246,52.259,52.457,51.764,52.080937,52.799821,14.038821,2000-2015,Production CO2 Emissions Absolute Value
ARM,Armenia,0.945,0.966,0.83,0.935,0.994,1.187,1.195,1.381,1.516,1.189,1.15,1.341,1.553,1.499,1.490023,1.52512,0.58012,2000-2015,Production CO2 Emissions Absolute Value
AUS,Australia,95.492749,97.612808,98.761296,100.531107,104.126436,105.235025,106.750573,108.847285,110.326918,111.476113,110.862716,110.18164,110.934183,108.328039,107.294472,109.099849,13.6071,2000-2015,Production CO2 Emissions Absolute Value
AUT,Austria,18.088085,19.186537,19.685205,21.294747,21.394377,21.721753,20.99753,20.269681,20.214589,18.472359,19.795804,19.194096,18.476708,18.547249,17.538909,18.161297,0.073212,2000-2015,Production CO2 Emissions Absolute Value
AZE,Azerbaijan,8.047,7.846,8.076,8.349,8.751,9.364,10.681,8.32,9.682,8.7,8.366,9.121,9.696,9.72,10.174873,10.462132,2.415132,2000-2015,Production CO2 Emissions Absolute Value


Process GDP and other WB indicator Data

In [261]:
adjustments = {
    'Eritrea':{
        'start':'2000',
        'end':'2011'
    },
    'Maldives':{
        'start':'2001',
        'end':'2015'
    },
    'Venezuela, RB':{
        'start':'2000',
        'end':'2014'
    },
    'Bermuda':{
        'start':'2000',
        'end':'2013'
    },
    'Libya':{
        'start':'2000',
        'end':'2011'
    }
}

def add_summary_range(country,row,summ_type,default_start,default_end):
    if summ_type == 'absolute':
        if country in adjustments:
            val = row[adjustments[country]['end']]-row[adjustments[country]['start']]
            time = '{}-{}'.format(adjustments[country]['start'],adjustments[country]['end'])
        else:
            val = row[default_end]-row[default_start]
            time = '{}-{}'.format(default_start, default_end)
    else:
        if country in adjustments:
            val = row[adjustments[country]['end']]/row[adjustments[country]['start']] - 1
            time = '{}-{}'.format(adjustments[country]['start'],adjustments[country]['end'])
        else:
            val = row[default_end]/row[default_start] - 1
            time = '{}-{}'.format(default_start, default_end)
            
    return(val, time)

res = req.get("http://api.worldbank.org/countries/all/indicators/NY.GDP.MKTP.KD?date=1999:2016&format=json&per_page=10000")
data = pd.io.json.json_normalize(res.json()[1])
data = data[["country.value", "date", "value"]]
value_name = 'GDP Absolute Value'
data.columns = ["Country Name", "Year", value_name]
data = data.pivot(index="Country Name", columns="Year", values=value_name).astype(float)
data["ISO"] = list(map(add_iso, data.index))
data = data.loc[pd.notnull(data["ISO"])]
data["Indicator"] = value_name 

# No Summary for the raw data
data['Summary Range'], data['Summary Range Years'] = zip(*data.apply(lambda row: add_summary_range(row.name,row,'absolute','2000','2015'),axis=1)) 

# Absolute values
wbg_gdp_data = data.copy()
year_cols = [str(yr) for yr in range(1999,2016)]

# Percent changes
data2 = data.loc[:,year_cols].pct_change(axis=1).loc[:,year_cols[1:]]
data2["Indicator"] = "GDP Percent Change"
data2["ISO"] = list(map(add_iso, data2.index))

data2['Summary Range'], data2['Summary Range Years'] = zip(*data.apply(lambda row: add_summary_range(row.name,row,'percent','2000','2015'),axis=1)) 

wbg_gdp_data = wbg_gdp_data.append(data2)
    
# Absolute changes
data3 = data.loc[:,year_cols].diff(periods=1,axis=1).loc[:,year_cols[1:]]
data3["Indicator"] = "GDP Absolute Change"
data3["ISO"] = list(map(add_iso, data3.index))
    
data3['Summary Range'], data3['Summary Range Years'] = zip(*data.apply(lambda row: add_summary_range(row.name,row,'absolute','2000','2015'),axis=1)) 

wbg_gdp_data = wbg_gdp_data.append(data3)

# Clean up    
wbg_gdp_data.index.name = "Country Name"
wbg_gdp_data = wbg_gdp_data.drop(["1999", "2016"], axis=1)

In [262]:
wbg_gdp_data.loc['Libya']

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Indicator,Summary Range,Summary Range Years
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Libya,48026340000.0,47179700000.0,46727320000.0,52809340000.0,55165500000.0,61714050000.0,65725800000.0,69900910000.0,71765420000.0,71198370000.0,74773440000.0,28357140000.0,,,,,LBY,GDP Absolute Value,-19669200000.0,2000-2011
Libya,0.03679213,-0.01762878,-0.009588494,0.13016,0.0446163,0.1187073,0.06500547,0.06352317,0.02667356,-0.007901418,0.0502129,-0.6207592,,,,,LBY,GDP Percent Change,-0.4095503,2000-2011
Libya,1704287000.0,-846645900.0,-452382200.0,6082028000.0,2356158000.0,6548547000.0,4011751000.0,4175111000.0,1864506000.0,-567048600.0,3575076000.0,-46416300000.0,,,,,LBY,GDP Absolute Change,-19669200000.0,2000-2011


In [263]:
write_to_S3(wbg_gdp_data, s3_bucket, FINAL_DATA + "World Bank GDP Data with ISO3, 2000-2015 with Summary Values.csv")

In [264]:
production_co2_emissions_absolute_value = read_from_S3(s3_bucket, FINAL_DATA+"Territory Emissions GCB absolute values with ISO3 2000-2015.csv")
consumption_co2_emissions_absolute_value = read_from_S3(s3_bucket, FINAL_DATA+"Consumption Emissions GCB absolute values with ISO3 2000-2015.csv")

production_co2_emissions_pct_change = read_from_S3(s3_bucket, FINAL_DATA+"Territory Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv")
consumption_co2_emissions_pct_change = read_from_S3(s3_bucket, FINAL_DATA+"Consumption Emissions GCB percent changes with ISO3 2000-2014 plus summary data.csv")

production_co2_emissions_abs_change = read_from_S3(s3_bucket, FINAL_DATA+"Territory Emissions GCB absolute changes with ISO3 2000-2015 plus summary data.csv")
consumption_co2_emissions_abs_change = read_from_S3(s3_bucket, FINAL_DATA+"Consumption Emissions GCB absolute changes with ISO3 2000-2014 plus summary data.csv")

gdp_data = read_from_S3(s3_bucket, FINAL_DATA + "World Bank GDP Data with ISO3, 2000-2015 with Summary Values.csv")

dsets = [production_co2_emissions_absolute_value, consumption_co2_emissions_absolute_value,
         production_co2_emissions_pct_change, consumption_co2_emissions_pct_change,
         production_co2_emissions_abs_change, consumption_co2_emissions_abs_change,
         gdp_data]

In [265]:
keep_these_countries = production_co2_emissions_pct_change.reset_index().set_index("ISO").index

def make_one_file(dsets):
    df = dsets[0].reset_index().set_index("ISO").loc[keep_these_countries].reset_index()
    print(df.shape)
    for i in range(1, len(dsets)):
        df = df.append(dsets[i].reset_index().set_index("ISO").loc[keep_these_countries].reset_index())
        print(df.shape)
    return(df)

final_data = make_one_file(dsets)

(183, 21)
(366, 21)
(549, 21)
(732, 21)
(915, 21)
(1098, 21)
(1647, 21)


In [266]:
# Write to S3
write_to_S3(final_data, s3_bucket, FINAL_DATA + \
                   "All Data Together.csv")

final_data.to_csv("/Users/nathansuberi/Documents/GitHub/nsuberi.github.io/wri-ghg/final-data/All Data Together Concise.csv")

In [267]:
indicators = final_data["Indicator"].unique()
year_cols = [str(yr) for yr in range(2000,2016)]
extents = {}
long_extents = {}
for indicator in indicators:
    data = final_data.loc[final_data["Indicator"]==indicator, year_cols]
    extents[indicator] = [data.min().min(), data.max().max()]
    
    summary_data = final_data.loc[final_data["Indicator"]==indicator, "Summary Range"]
    long_extents[indicator] = [summary_data.min(), summary_data.max()]

In [268]:
final_data[pd.isnull(final_data["Summary Range"])]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,Country Name,ISO,Indicator,Summary Range,Summary Range Years
1,,,,,,,,,,,,,,,,,Algeria,DZA,Consumption CO2 Emissions Absolute Value,,2000-2014
2,,,,,,,,,,,,,,,,,Andorra,AND,Consumption CO2 Emissions Absolute Value,,2000-2014
3,,,,,,,,,,,,,,,,,Angola,AGO,Consumption CO2 Emissions Absolute Value,,2000-2014
4,,,,,,,,,,,,,,,,,Antigua and Barbuda,ATG,Consumption CO2 Emissions Absolute Value,,2000-2014
10,,,,,,,,,,,,,,,,,"Bahamas, The",BHS,Consumption CO2 Emissions Absolute Value,,2000-2014
13,,,,,,,,,,,,,,,,,Barbados,BRB,Consumption CO2 Emissions Absolute Value,,2000-2014
16,,,,,,,,,,,,,,,,,Belize,BLZ,Consumption CO2 Emissions Absolute Value,,2000-2014
18,,,,,,,,,,,,,,,,,Bermuda,BMU,Consumption CO2 Emissions Absolute Value,,2000-2014
19,,,,,,,,,,,,,,,,,Bhutan,BTN,Consumption CO2 Emissions Absolute Value,,2000-2014
20,,,,,,,,,,,,,,,,,Bosnia and Herzegovina,BIH,Consumption CO2 Emissions Absolute Value,,2000-2014


In [None]:
final_data

In [269]:
extents

{'Production CO2 Emissions Absolute Value': [0.0069999999999999993,
  9896.7060457050302],
 'Consumption CO2 Emissions Absolute Value': [-0.31450916878671931,
  2470.0722301013307],
 nan: [nan, nan],
 'Production CO2 Emissions Percent Change': [-0.58231707317073167,
  5.8064516129032278],
 'Consumption CO2 Emissions Percent Change': [-2.2820333549161385,
  4.0330301875807812],
 'Production CO2 Emissions Absolute Change': [-0.58231707317073167,
  5.8064516129032278],
 'Consumption CO2 Emissions Absolute Change': [-2.2820333549161385,
  4.0330301875807812],
 'GDP Absolute Value': [118428316.402523, 75636751196801.094],
 'GDP Percent Change': [-0.62075919584900086, 1.7918065792259044],
 'GDP Absolute Change': [-1118392942734.1953, 2727043480126.0]}

In [270]:
long_extents

{'Production CO2 Emissions Absolute Value': [-157.98880529674875,
  3111.6066568448969],
 'Consumption CO2 Emissions Absolute Value': [-97.065613082643722,
  1600.804809484755],
 nan: [nan, nan],
 'Production CO2 Emissions Percent Change': [-0.36313606588722158,
  13.091579476991498],
 'Consumption CO2 Emissions Percent Change': [-0.39043823222336332,
  5.0366677469343344],
 'Production CO2 Emissions Absolute Change': [-0.36313606588722158,
  13.091579476991498],
 'Consumption CO2 Emissions Absolute Change': [-0.39043823222336332,
  5.0366677469343344],
 'GDP Absolute Value': [-19669203139.392899, 25635055621226.297],
 'GDP Percent Change': [-0.40955028618152017, 4.2559671805372483],
 'GDP Absolute Change': [-19669203139.392899, 25635055621226.297]}

In [271]:
final_data[(final_data['Country Name']=='Eritrea') & ['GDP' in ind if type(ind)==str else False for ind in final_data['Indicator']]]

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,Country Name,ISO,Indicator,Summary Range,Summary Range Years
156,1939336000.0,2109133000.0,2172522000.0,2114830000.0,2145532000.0,2200768000.0,2179437000.0,2210534000.0,1994277000.0,2071585000.0,2117040000.0,2300794000.0,,,,,Eritrea,ERI,GDP Absolute Value,361458300.0,2000-2011
157,-0.03141986,0.08755435,0.03005429,-0.02655516,0.01451736,0.0257445,-0.009692169,0.01426822,-0.0978303,0.03876502,0.02194191,0.086798,,,,,Eritrea,ERI,GDP Percent Change,0.1863825,2000-2011
158,-62910290.0,169797300.0,63388510.0,-57691660.0,30701740.0,55235640.0,-21330210.0,31096690.0,-216257200.0,77308170.0,45454520.0,183754800.0,,,,,Eritrea,ERI,GDP Absolute Change,361458300.0,2000-2011


In [215]:
production_co2_emissions_absolute_value['ISO'].unique()

array(['ALB', 'DZA', 'AND', 'AGO', 'ATG', 'ARG', 'ARM', 'AUS', 'AUT',
       'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR', 'BEL', 'BLZ', 'BEN',
       'BMU', 'BTN', 'BIH', 'BWA', 'BRA', 'BRN', 'BGR', 'BFA', 'BDI',
       'KHM', 'CAN', 'CPV', 'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COM',
       'COG', 'CRI', 'CIV', 'HRV', 'CUB', 'CYP', 'CZE', 'ZAR', 'DNK',
       'DJI', 'DMA', 'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST',
       'ETH', 'FSM', 'FJI', 'FIN', 'FRA', 'GAB', 'GMB', 'GEO', 'DEU',
       'GHA', 'GRC', 'GRL', 'GRD', 'GTM', 'GIN', 'GNB', 'GUY', 'HTI',
       'HND', 'HKG', 'HUN', 'ISL', 'IND', 'IDN', 'IRQ', 'IRL', 'IRN',
       'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KEN', 'KIR', 'KWT',
       'KGZ', 'LAO', 'LVA', 'LBN', 'LBR', 'LBY', 'LTU', 'LUX', 'MAC',
       'MKD', 'MDG', 'MWI', 'MYS', 'MDV', 'MLI', 'MLT', 'MHL', 'MRT',
       'MUS', 'MEX', 'MNG', 'MNE', 'MAR', 'MOZ', 'MMR', 'NAM', 'NPL',
       'NLD', 'NZL', 'NIC', 'NER', 'NGA', 'NOR', 'OMN', 'PAK', 'PLW',
       'PAN', 'PNG',

Experimentation Below

In [58]:
data_names_and_codes = {'NY.GDP.MKTP.KD': 'GDP (current US$)'}
# {'EG.ELC.ACCS.ZS': 'Access to electricity (% of population)',
#  'EG.FEC.RNEW.ZS': 'Renewable energy consumption (% of total final energy consumption)',
#  'IT.NET.USER.ZS': 'Individuals using the Internet (% of population)',
#  'NE.CON.PRVT.PC.KD': 'Household final consumption expenditure per capita (constant 2010 US$)',
#  'NV.IND.TOTL.KD': 'Industry, value added (constant 2010 US$)',
#  'NY.GDP.TOTL.RT.ZS': 'Total natural resources rents (% of GDP)',
#  'SG.GEN.PARL.ZS': 'Proportion of seats held by women in national parliaments (%)',
#  'SL.EMP.TOTL.SP.ZS': 'Employment to population ratio, 15+, total (%) (modeled ILO estimate)',
#  'SM.POP.NETM': 'Net migration',
#  'SP.DYN.LE00.IN': 'Life expectancy at birth, total (years)',
#  'SP.URB.TOTL.IN.ZS': 'Urban population (% of total)',
#  'TM.VAL.MRCH.CD.WT': 'Merchandise imports (current US$)'}
 

column_long_name_to_short_name = {'GDP (current US$)': 'GDP'}
#     {'Renewable energy consumption (% of total final energy consumption)': 'renewable_energy_consumption_of_total_final_energy_consumpti',
#     'Household final consumption expenditure per capita (constant 2010 US$)': 'household_final_consumption_expenditure_per_capita_constant_20',
#     'Merchandise imports (current US$)': 'merchandise_imports_current_us_tm_val_mrch_cd_wt',
#     'Industry, value added (constant 2010 US$)': 'industry_value_added_constant_2010_us_nv_ind_totl_kd',
#     'Access to electricity (% of population)': 'access_to_electricity_of_population_eg_elc_accs_zs',
#     'Urban population (% of total)': 'urban_population_of_total_sp_urb_totl_in_zs',
#     'Employment to population ratio, 15+, total (%) (modeled ILO estimate)': 'employment_to_population_ratio_15_total_modeled_ilo_est',
#     'Total natural resources rents (% of GDP)': 'total_natural_resources_rents_of_gdp_ny_gdp_totl_rt_zs',
#     'Life expectancy at birth, total (years)': 'life_expectancy_at_birth_total_years_sp_dyn_le00_in',
#     'Net migration': 'net_migration_sm_pop_netm',
#     'Proportion of seats held by women in national parliaments (%)': 'proportion_of_seats_held_by_women_in_national_parliaments',
#     'Individuals using the Internet (% of population)': 'individuals_using_the_internet_of_population_it_net_user_z'}

series_code_to_data_viz_name = {}
for key, value in data_names_and_codes.items():
    series_code_to_data_viz_name[key] = column_long_name_to_short_name[value]
    
series_code_to_data_viz_name 

{'NY.GDP.MKTP.CD': 'GDP'}

In [None]:
indicators = series_code_to_data_viz_name

for indicator in indicators:
    # Results are paginated
    print(indicator)
    res = req.get("http://api.worldbank.org/countries/all/indicators/{}?date=1999:2016&format=json&per_page=10000".format(indicator))
    data = pd.io.json.json_normalize(res.json()[1])
    data = data[["country.value", "date", "value"]]
    value_name = series_code_to_data_viz_name[indicator]
    data.columns = ["Country Name", "Year", value_name]
    data = data.pivot(index="Country Name", columns="Year", values=value_name).astype(float)
    data["ISO"] = list(map(add_iso, data.index))
    data = data.loc[pd.notnull(data["ISO"])]
    data["Indicator"] = value_name 
    
    all_world_bank_data = all_world_bank_data.append(data)
    
    if indicator == "NY.GDP.MKTP.CD":
        year_cols = [str(yr) for yr in range(1999,2016)]
        data = data.loc[:,year_cols].pct_change(axis=1).loc[:,year_cols[1:]]
        data["Indicator"] = "GDP percent change"
        data["ISO"] = list(map(add_iso, data.index))
        all_world_bank_data = all_world_bank_data.append(data)
    
all_world_bank_data.index.name = "Country Name"
all_world_bank_data = all_world_bank_data.drop(["1999", "2016"], axis=1)

In [26]:
all_world_bank_data.head()

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Indicator
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,,2461666000.0,4128821000.0,4583644000.0,5285466000.0,6275074000.0,7057598000.0,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19215560000.0,AFG,GDP
Albania,3632044000.0,4060759000.0,4435079000.0,5746946000.0,7314865000.0,8158549000.0,8992642000.0,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12776280000.0,13228240000.0,11335260000.0,ALB,GDP
Algeria,54790250000.0,54744710000.0,56760290000.0,67863830000.0,85325000000.0,103198200000.0,117027300000.0,134977100000.0,171000700000.0,137211000000.0,161207300000.0,200019100000.0,209059000000.0,209755000000.0,213810000000.0,165874300000.0,DZA,GDP
American Samoa,,,514000000.0,527000000.0,512000000.0,503000000.0,496000000.0,520000000.0,563000000.0,678000000.0,576000000.0,574000000.0,644000000.0,641000000.0,643000000.0,659000000.0,ASM,GDP
Andorra,1434430000.0,1496913000.0,1733117000.0,2398646000.0,2935659000.0,3255789000.0,3543257000.0,4016972000.0,4007353000.0,3660531000.0,3355695000.0,3442063000.0,3164615000.0,3281585000.0,3350736000.0,2811489000.0,AND,GDP


In [29]:
reverse_map = {v: k for k, v in column_long_name_to_short_name.items()}
def create_summary_values_World_Bank(row):
    #print(row)
    if row["Indicator"] == "GDP percent change":
        val = row["2015"] - row["2000"]
        return(val, "2000-2015")
    else:
        indicator = reverse_map[row["Indicator"]]
    
    if indicator == 'Renewable energy consumption (% of total final energy consumption)':
        val = row["2014"] - row["2000"]
        return(val, "2000-2014")

    elif indicator == 'GDP (current US$)':
        val = row["2015"] - row["2000"]
        return(val, "2000-2015")

    elif indicator == 'Household final consumption expenditure per capita (constant 2010 US$)':
        val = row["2015"]
        return(val, "2015")

    elif indicator == 'Merchandise imports (current US$)':
        val = row["2015"] - row["2000"]
        return(val, "2000-2015")

    elif indicator == 'Industry, value added (constant 2010 US$)':
        val = row["2015"] - row["2000"]
        return(val, "2000-2015")

    elif indicator == 'Access to electricity (% of population)':
        val = row["2014"]
        return(val, "2014")

    elif indicator == 'Urban population (% of total)':
        val = row["2015"]
        return(val, "2015")

    elif indicator == 'Employment to population ratio, 15+, total (%) (modeled ILO estimate)':
        val = row["2015"] - row["2000"]
        return(val, "2000-2015")

    elif indicator == 'Total natural resources rents (% of GDP)':
        val = row["2015"]
        return(val, "2015")

    elif indicator == 'Life expectancy at birth, total (years)':
        val = row["2015"]
        return(val, "2015")

    elif indicator == 'Net migration':
        val = row["2012"]
        return(val, "2012")

    elif indicator == 'Proportion of seats held by women in national parliaments (%)':
        val = row["2015"]
        return(val, "2015")

    elif indicator == 'Individuals using the Internet (% of population)':
        val = row["2015"]
        return(val, "2015")
     
summary_data = all_world_bank_data.apply(create_summary_values_World_Bank, axis=1)
all_world_bank_data["Summary Range"], all_world_bank_data["Summary Range Years"] = list(zip(*summary_data))

In [30]:
all_world_bank_data.head()

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Indicator,Summary Range,Summary Range Years
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Afghanistan,0.163713,1.006099,3.467205,7.209179,13.969172,23.0,27.506411,34.290512,42.4,47.888466,42.7,61.51442,69.1,75.154373,89.5,,AFG,access_to_electricity_of_population_eg_elc_acc...,89.5,2014
Albania,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,,ALB,access_to_electricity_of_population_eg_elc_acc...,100.0,2014
Algeria,96.702133,97.004044,97.298698,97.590019,97.881889,98.184265,98.490738,98.806519,99.3,99.443893,99.711174,99.889542,99.973083,99.996918,100.0,,DZA,access_to_electricity_of_population_eg_elc_acc...,100.0,2014
American Samoa,,,,,,,,,,,,,,,,,ASM,access_to_electricity_of_population_eg_elc_acc...,,2014
Andorra,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,,AND,access_to_electricity_of_population_eg_elc_acc...,100.0,2014


In [31]:
write_to_S3(all_world_bank_data, s3_bucket, FINAL_DATA + "World Bank Data with ISO3, 2000-2015 with Summary Values.csv")

Calculate Index Values

In [32]:
world_bank_data = read_from_S3(s3_bucket, FINAL_DATA + "World Bank Data with ISO3, 2000-2015 with Summary Values.csv")

world_bank_data.head()

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Indicator,Summary Range,Summary Range Years
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Afghanistan,0.163713,1.006099,3.467205,7.209179,13.969172,23.0,27.506411,34.290512,42.4,47.888466,42.7,61.51442,69.1,75.154373,89.5,,AFG,access_to_electricity_of_population_eg_elc_acc...,89.5,2014
Albania,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,,ALB,access_to_electricity_of_population_eg_elc_acc...,100.0,2014
Algeria,96.702133,97.004044,97.298698,97.590019,97.881889,98.184265,98.490738,98.806519,99.3,99.443893,99.711174,99.889542,99.973083,99.996918,100.0,,DZA,access_to_electricity_of_population_eg_elc_acc...,100.0,2014
American Samoa,,,,,,,,,,,,,,,,,ASM,access_to_electricity_of_population_eg_elc_acc...,,2014
Andorra,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,,AND,access_to_electricity_of_population_eg_elc_acc...,100.0,2014


In [38]:
# Calculating index values
# formula = (1 – ΔCO2)*(1 + ΔGDP) - ΔCO2 + ΔGDP

world_bank_data = read_from_S3(s3_bucket, FINAL_DATA + "World Bank Data with ISO3, 2000-2015 with Summary Values.csv")
gdp_percent_change_data = world_bank_data.set_index("Indicator").loc["GDP percent change"]
gdp_percent_change_data.set_index(["ISO"], inplace=True)
gdp_percent_change_data = gdp_percent_change_data.drop(["Summary Range", "Summary Range Years"], axis=1)

def calc_index(co2, gdp):
    return((1-co2)*(1+gdp) - co2 + gdp)
    
# import CO2 change
## Territorial
territory_emissions = read_from_S3(s3_bucket, FINAL_DATA + \
                   "Territory Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv", index_col = "ISO")

## Consumption
consumption_emissions = read_from_S3(s3_bucket, FINAL_DATA + \
                   "Consumption Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv", index_col = "ISO")

# https://stackoverflow.com/questions/22149584/what-does-axis-in-pandas-mean
country_names_territory = territory_emissions["Country Name"]
country_names_consumption = consumption_emissions["Country Name"]

territory_emissions_gdp_index = calc_index(territory_emissions.drop(["Country Name", "Summary Range", "Summary Range Years", "Indicator"], axis=1), gdp_percent_change_data)
consumption_emissions_gdp_index = calc_index(consumption_emissions.drop(["Country Name", "Summary Range", "Summary Range Years","Indicator"], axis=1), gdp_percent_change_data)

def create_summary_values_ICGGD(row):
    val = row["2015"] - row["2000"]
    return(val, "2000-2015")
    
summary_data = territory_emissions_gdp_index.apply(create_summary_values_ICGGD, axis=1)
territory_emissions_gdp_index["Summary Range"], territory_emissions_gdp_index["Summary Range Years"] = list(zip(*summary_data))

summary_data = consumption_emissions_gdp_index.apply(create_summary_values_ICGGD, axis=1)
consumption_emissions_gdp_index["Summary Range"], consumption_emissions_gdp_index["Summary Range Years"] = list(zip(*summary_data))

territory_emissions_gdp_index["Indicator"] = "ICGGD with Production Emissions"
consumption_emissions_gdp_index["Indicator"] = "ICGGD with Consumption Emissions"
territory_emissions_gdp_index["Country Name"] = country_names_territory
consumption_emissions_gdp_index["Country Name"] = country_names_consumption

In [39]:
# Write to S3
write_to_S3(territory_emissions_gdp_index, s3_bucket, FINAL_DATA + \
                   "ICGGD calculated with Territory Emissions.csv")

write_to_S3(consumption_emissions_gdp_index, s3_bucket, FINAL_DATA + \
                   "ICGGD calculated with Consumption Emissions.csv")

In [76]:
# Create single file that has all the data for the application

#gdp_annual_pct_change = read_from_S3(s3_bucket, FINAL_DATA+".csv")
#gdp_annual_absolute_value = read_from_S3(s3_bucket, FINAL_DATA+".csv")

production_co2_emissions_annual_change = read_from_S3(s3_bucket, FINAL_DATA+"Territory Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv")
production_co2_emissions_absolute_value = read_from_S3(s3_bucket, FINAL_DATA+"Territory Emissions GCB absolute values with ISO3 2000-2015 plus summary data.csv")

consumption_co2_emissions_annual_change = read_from_S3(s3_bucket, FINAL_DATA+"Consumption Emissions GCB percent changes with ISO3 2000-2015 plus summary data.csv")
consumption_co2_emissions_absolute_value = read_from_S3(s3_bucket, FINAL_DATA+"Consumption Emissions GCB absolute values with ISO3 2000-2015 plus summary data.csv")

index_with_production_emissions_and_gdp = read_from_S3(s3_bucket, FINAL_DATA+"ICGGD calculated with Territory Emissions.csv")
index_with_consumption_emissions_and_gdp = read_from_S3(s3_bucket, FINAL_DATA+"ICGGD calculated with Consumption Emissions.csv")

world_bank_data = read_from_S3(s3_bucket, FINAL_DATA+"World Bank Data with ISO3, 2000-2015 with Summary Values.csv")

dsets = [production_co2_emissions_annual_change, production_co2_emissions_absolute_value,
        consumption_co2_emissions_annual_change, consumption_co2_emissions_absolute_value,
        index_with_production_emissions_and_gdp, index_with_consumption_emissions_and_gdp,
        world_bank_data]

def print_columns(list_of_dfs):
    for df in list_of_dfs:
        print(df.reset_index().columns)
        
print_columns(dsets)

                         2000      2001      2002      2003      2004  \
Country Name                                                            
Albania              0.012285  0.066748  0.163823  0.144673 -0.029889   
Algeria             -0.045460 -0.041370  0.077827  0.018486 -0.032853   
Andorra              0.021429  0.000000  0.013986  0.006897  0.054795   
Angola               0.042050  0.019985  0.301432 -0.284308  1.073220   
Antigua and Barbuda -0.010526  0.000000  0.053191  0.070707  0.047170   

                         2005      2006      2007      2008      2009  \
Country Name                                                            
Albania              0.021127 -0.083621  0.007526  0.113912  0.000838   
Algeria              0.197050 -0.055658  0.082355  0.007334  0.100934   
Andorra              0.019481 -0.050955 -0.013423  0.000000 -0.040816   
Angola               0.019317  0.162328  0.129611  0.022161  0.081016   
Antigua and Barbuda  0.009009  0.035714  0.103448 

In [53]:
world_bank_data[world_bank_data["Indicator"]=="GDP"]

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Indicator,Summary Range,Summary Range Years
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Afghanistan,,2461666000.0,4128821000.0,4583644000.0,5285466000.0,6275074000.0,7057598000.0,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19702990000.0,AFG,GDP,,2000-2015
Albania,3632044000.0,4060759000.0,4435079000.0,5746946000.0,7314865000.0,8158549000.0,8992642000.0,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12781030000.0,13219860000.0,11390370000.0,ALB,GDP,7758321000.0,2000-2015
Algeria,54790250000.0,54744710000.0,56760290000.0,67863830000.0,85325000000.0,103198200000.0,117027300000.0,134977100000.0,171000700000.0,137211000000.0,161207300000.0,200013100000.0,209047400000.0,209783500000.0,213983100000.0,164779500000.0,DZA,GDP,109989200000.0,2000-2015
American Samoa,,,514000000.0,527000000.0,512000000.0,503000000.0,496000000.0,520000000.0,563000000.0,678000000.0,576000000.0,574000000.0,644000000.0,639000000.0,638000000.0,641000000.0,ASM,GDP,,2000-2015
Andorra,1401695000.0,1484018000.0,1717485000.0,2373928000.0,2916787000.0,3248215000.0,3536633000.0,4010991000.0,4001201000.0,3650083000.0,3346517000.0,3427023000.0,3146152000.0,3248925000.0,,,AND,GDP,,2000-2015
Angola,9129595000.0,8936064000.0,12497350000.0,14188950000.0,19640850000.0,28233710000.0,41789480000.0,60448920000.0,84178030000.0,75492380000.0,82470910000.0,104115900000.0,115398400000.0,124912100000.0,126776900000.0,102962200000.0,AGO,GDP,93832650000.0,2000-2015
Antigua and Barbuda,825405500.0,795976500.0,809754500.0,850218600.0,913710400.0,1014980000.0,1149025000.0,1302389000.0,1359734000.0,1217720000.0,1147942000.0,1141865000.0,1216046000.0,1195885000.0,1274330000.0,1355646000.0,ATG,GDP,530240400.0,2000-2015
Argentina,284203800000.0,268696800000.0,97724000000.0,127587000000.0,164657900000.0,198737100000.0,232557300000.0,287530500000.0,361558000000.0,332976500000.0,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,584711500000.0,ARG,GDP,300507700000.0,2000-2015
Armenia,1911564000.0,2118468000.0,2376335000.0,2807061000.0,3576615000.0,4900470000.0,6384452000.0,9206302000.0,11662040000.0,8647937000.0,9260285000.0,10142110000.0,10619320000.0,11121470000.0,11609510000.0,10529180000.0,ARM,GDP,8617619000.0,2000-2015
Aruba,1873453000.0,1920263000.0,1941095000.0,2021302000.0,2228279000.0,2331006000.0,2421475000.0,2623726000.0,2791961000.0,2498933000.0,2467704000.0,2584464000.0,,,,,ABW,GDP,,2000-2015


In [62]:
consumption_co2_emissions_absolute_value.head()

Unnamed: 0_level_0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,ISO,Summary Range,Summary Range Years,Indicator
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Albania,1.029235,1.102482,1.355331,1.506422,1.453613,1.605657,1.516234,1.51755,1.655498,1.696258,1.650886,1.711138,1.598367,1.543575,1.534179,,ALB,0.504944,2000-2014,Consumption CO2 Emissions Absolute Value
Algeria,,,,,,,,,,,,,,,,,DZA,,2000-2014,Consumption CO2 Emissions Absolute Value
Andorra,,,,,,,,,,,,,,,,,AND,,2000-2014,Consumption CO2 Emissions Absolute Value
Angola,,,,,,,,,,,,,,,,,AGO,,2000-2014,Consumption CO2 Emissions Absolute Value
Antigua and Barbuda,,,,,,,,,,,,,,,,,ATG,,2000-2014,Consumption CO2 Emissions Absolute Value


In [80]:
keep_these_countries = production_co2_emissions_annual_change.reset_index().set_index("ISO").index

def make_one_file(dsets):
    df = dsets[0].reset_index().set_index("ISO").loc[keep_these_countries].reset_index()
    print(df.shape)
    for i in range(1, len(dsets)):
        df = df.append(dsets[i].reset_index().set_index("ISO").loc[keep_these_countries].reset_index())
        print(df.shape)
    return(df)

final_data = make_one_file(dsets)

(183, 21)
(366, 21)
(549, 21)
(732, 21)
(915, 21)
(1098, 21)
(3660, 21)


In [81]:
# Write to S3
write_to_S3(final_data, s3_bucket, FINAL_DATA + \
                   "All Data Together.csv")

final_data.to_csv("/Users/nathansuberi/Desktop/Code Portfolio/nsuberi.github.io/wri-ghg/final-data/All Data Together.csv")

In [None]:
## Calculate extents in Python... re-did this in javascript

In [73]:
indicators = final_data["Indicator"].unique()
year_cols = [str(yr) for yr in range(2000,2016)]
extents = {}
long_extents = {}
for indicator in indicators:
    data = final_data.loc[final_data["Indicator"]==indicator, year_cols]
    extents[indicator] = [data.min().min(), data.max().max()]
    
    summary_data = final_data.loc[final_data["Indicator"]==indicator, "Summary Range"]
    long_extents[indicator] = [summary_data.min(), summary_data.max()]

In [75]:
print("extents")
print(extents)
print("long extents")
print(long_extents)

extents
{'Production CO2 Emissions Annual Change': [-0.58231707317073167, 5.8064516129032278], 'Production CO2 Emissions Absolute Value': [0.0069999999999999993, 9896.7060457050302], 'Consumption CO2 Emissions Annual Change': [-2.2820333549161385, 4.0330301875807812], 'Consumption CO2 Emissions Absolute Value': [-0.31450916878671931, 2470.0722301013307], 'ICGGD with Production Emissions': [-12.123624635633307, 9.1800704884216824], 'ICGGD with Consumption Emissions': [-7.4118054937117783, 5.9522470514303185], 'access_to_electricity_of_population_eg_elc_accs_zs': [0.0154853165149689, 100.0], 'renewable_energy_consumption_of_total_final_energy_consumpti': [0.0, 98.342609009734005], 'individuals_using_the_internet_of_population_it_net_user_z': [0.000289277, 98.323609649999995], 'household_final_consumption_expenditure_per_capita_constant_20': [152.47162086105499, 48546.572692095098], 'industry_value_added_constant_2010_us_nv_ind_totl_kd': [1322136.39560658, 21258209566653.301], 'total_natu