In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
import ddf_utils.ddf_reader as dr
from ddf_utils.str import format_float_sigfig, format_float_digits

In [3]:
dr.SEARCH_PATH = '../../../'

In [4]:
co2 = dr.ddf_datapoint('ddf--cdiac-co2', 
                       'total_carbon_emissions', 'nation,year')

no index file, creating one...


In [5]:
pop = dr.ddf_datapoint('ddf--gapminder--population', 'population')

In [6]:
geo = dr.ddf_entities('ddf--gapminder--geo_entity_domain')['country']
nation = dr.ddf_entities('ddf--cdiac-co2')['nation']

In [7]:
co2.head()

Unnamed: 0,nation,year,total_carbon_emissions
0,afghanistan,1949,4.0
1,afghanistan,1950,23.0
2,afghanistan,1951,25.0
3,afghanistan,1952,25.0
4,afghanistan,1953,29.0


In [8]:
pop.head()

Unnamed: 0,country,year,population
0,abw,1800,19286.0
1,abw,1801,19286.0
2,abw,1802,19286.0
3,abw,1803,19286.0
4,abw,1804,19286.0


In [None]:
# see if we can make the nation align to gapminder geo domain

In [9]:
cdiac_nation_map = nation.to_dict('record')

In [10]:
cdiac_nation_map = dict([(x['nation'], x['name']) for x in cdiac_nation_map])

In [11]:
co2['name'] = co2.nation.map(lambda x: cdiac_nation_map[x])

In [12]:
co2.head()

Unnamed: 0,nation,year,total_carbon_emissions,name
0,afghanistan,1949,4.0,AFGHANISTAN
1,afghanistan,1950,23.0,AFGHANISTAN
2,afghanistan,1951,25.0,AFGHANISTAN
3,afghanistan,1952,25.0,AFGHANISTAN
4,afghanistan,1953,29.0,AFGHANISTAN


In [13]:
geo.alternative_4_cdiac.head()

0            NaN
1    Afghanistan
2            NaN
3        Albania
4        Algeria
Name: alternative_4_cdiac, dtype: object

In [15]:
map_c = {}
search_cols = ['name', 'gapminder_list','alternative_1', 'alternative_2', 'alternative_3',
               'alternative_4_cdiac', 'pandg', 'god_id', 'alt_5', 'upper_case_name', 
               'arb1', 'arb2', 'arb3', 'arb4', 'arb5', 'arb6'
              ]

for g in co2.name.unique():
    masks = []
    for c in search_cols:
        masks.append(geo[c].str.lower() == g.lower())
    
    map0 = masks[0]
    for m in masks[1:]:
        map0 = map0 | m
        
    filtered = geo[map0]
    if len(filtered) > 1:
        print('multiple entities found for '+g)
        print(filtered['country'].values)
    elif len(filtered) > 0:
        map_c[g] = filtered['country'].values[0]
    else:
        print('not found: ', g)

not found:  ANTARCTIC FISHERIES
not found:  BONAIRE, SAINT EUSTATIUS, AND SABA
not found:  DEMOCRATIC REPUBLIC OF VIETNAM
not found:  EAST & WEST PAKISTAN
not found:  FEDERATION OF MALAYA-SINGAPORE
not found:  FORMER PANAMA CANAL ZONE
not found:  FRENCH EQUATORIAL AFRICA
not found:  FRENCH INDO-CHINA
not found:  FRENCH WEST AFRICA
not found:  KUWAITI OIL FIRES
not found:  LEEWARD ISLANDS
not found:  NETHERLAND ANTILLES AND ARUBA
not found:  PACIFIC ISLANDS (PALAU)
not found:  PENINSULAR MALAYSIA
not found:  REPUBLIC OF SOUTH VIETNAM
not found:  RHODESIA-NYASALAND
not found:  RWANDA-URUNDI
not found:  RYUKYU ISLANDS
not found:  SABAH
not found:  SARAWAK
not found:  ST. KITTS-NEVIS-ANGUILLA
not found:  TANGANYIKA
not found:  ZANZIBAR


In [16]:
geo.columns

Index(['country', 'gwid', 'name', 'world_6region', 'income_groups',
       'landlocked', 'g77_and_oecd_countries', 'main_religion_2008',
       'gapminder_list', 'alternative_1', 'alternative_2', 'alternative_3',
       'alternative_4_cdiac', 'pandg', 'god_id', 'alt_5', 'upper_case_name',
       'code', 'number', 'arb1', 'arb2', 'arb3', 'arb4', 'arb5', 'arb6',
       'is--country', 'world_4region', 'latitude', 'longitude'],
      dtype='object')

In [17]:
len(map_c)

232

In [18]:
len(geo.country)

275

In [19]:
dif = list(set(geo.country.values).difference(set(map_c.values())))

In [20]:
# here is a list of country/regions that CDIAC not covers on the GM geo domain.

geo.set_index('country').ix[dif]['name']

country
ala                                                         Åland
mnp                                      Northern Mariana Islands
gbm                                                   Isle of Man
iot                                British Indian Ocean Territory
asm                                                American Samoa
sgero_a_ssandw       South Georgia and the South Sandwich Islands
tuv                                                        Tuvalu
north_mar                                       Northern Marianas
gbg                                                      Guernsey
virg_isl                                           Virgin Islands
mco                                                        Monaco
esh                                                Western Sahara
ata                                                    Antarctica
usa_minor_out_isl                       US Minor Outlying Islands
abkh                                                     Abkhazia
nf

In [21]:
co2_filtered = co2[co2.name.isin(map_c.keys())].copy()

In [22]:
co2_filtered.head()

Unnamed: 0,nation,year,total_carbon_emissions,name
0,afghanistan,1949,4.0,AFGHANISTAN
1,afghanistan,1950,23.0,AFGHANISTAN
2,afghanistan,1951,25.0,AFGHANISTAN
3,afghanistan,1952,25.0,AFGHANISTAN
4,afghanistan,1953,29.0,AFGHANISTAN


In [23]:
co2_filtered.nation = co2_filtered.name.map(lambda x: map_c[x])

# calculation of indicators

full list see https://github.com/open-numbers/ddf--gapminder--co2_emission/issues/2

In [24]:
# Yearly CO2 emissions (1000 tonnes)

yearly_co2 = co2_filtered[['nation', 'year', 'total_carbon_emissions']].copy()

In [25]:
yearly_co2.head()

Unnamed: 0,nation,year,total_carbon_emissions
0,afg,1949,4.0
1,afg,1950,23.0
2,afg,1951,25.0
3,afg,1952,25.0
4,afg,1953,29.0


In [26]:
yearly_co2.columns = ['country', 'year', 'yearly_co2_emissions_1000_tonnes']

In [27]:
yearly_co2 = yearly_co2.set_index(['country', 'year'])

In [28]:
# get CO2 emission

yearly_co2 = yearly_co2 * ((12+16*2)/12)

In [29]:
# set negative value to zero.

yearly_co2.ix[yearly_co2['yearly_co2_emissions_1000_tonnes'] < 0, 'yearly_co2_emissions_1000_tonnes'] = 0

In [30]:
# confirm no negative values
yearly_co2.ix[yearly_co2['yearly_co2_emissions_1000_tonnes'] < 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,yearly_co2_emissions_1000_tonnes
country,year,Unnamed: 2_level_1


In [31]:
csv = yearly_co2.copy()

In [32]:
csv['yearly_co2_emissions_1000_tonnes'] = \
csv['yearly_co2_emissions_1000_tonnes'].map(format_float_digits)

In [33]:
csv.to_csv('../../ddf--datapoints--yearly_co2_emissions_1000_tonnes--by--country--year.csv')

In [34]:
# Cumulative CO2 emissions (tonnes)

cumulative_co2 = yearly_co2.sort_index(level=[0, 1]).groupby(level=0)

In [35]:
cumulative_co2 = cumulative_co2['yearly_co2_emissions_1000_tonnes'].apply(np.cumsum)

In [36]:
cumulative_co2 = cumulative_co2.reset_index()

In [37]:
cumulative_co2.columns = ['country', 'year', 'cumulative_co2_emissions_tonnes']

In [38]:
cumulative_co2.head()

Unnamed: 0,country,year,cumulative_co2_emissions_tonnes
0,abw,1986,179.666667
1,abw,1987,627.0
2,abw,1988,1239.333333
3,abw,1989,1888.333333
4,abw,1990,3619.0


In [39]:
# unit

cumulative_co2['cumulative_co2_emissions_tonnes'] = \
cumulative_co2['cumulative_co2_emissions_tonnes'] * 1000

In [40]:
csv = cumulative_co2.copy()

csv['cumulative_co2_emissions_tonnes'] = \
csv['cumulative_co2_emissions_tonnes'].map(int)

In [41]:
csv.to_csv('../../ddf--datapoints--cumulative_co2_emissions_tonnes--by--country--year.csv', index=False)

In [42]:
# CO2 per capita (tonnes per person)

pop.columns = ['country', 'year', 'value']
pop = pop.set_index(['country', 'year'])

In [43]:
a = yearly_co2.yearly_co2_emissions_1000_tonnes / pop['value']

In [44]:
per_person = a.dropna() * 1000

In [45]:
per_person = per_person.reset_index()
per_person.columns = ['country', 'year', 'co2_emissions_tonnes_per_person']

In [46]:
per_person.head()

Unnamed: 0,country,year,co2_emissions_tonnes_per_person
0,abw,1986,2.868059
1,abw,1987,7.234306
2,abw,1988,10.025596
3,abw,1989,10.633766
4,abw,1990,27.847504


In [47]:
csv = per_person.copy()

csv['co2_emissions_tonnes_per_person'] = csv['co2_emissions_tonnes_per_person'].map(format_float_digits)

In [48]:
csv.to_csv('../../ddf--datapoints--co2_emissions_tonnes_per_person--by--country--year.csv', index=False)

# concepts

In [49]:
concepts = [
    'name',
    'indicator_url',
    'unit',
    'country',
    'year',
    'yearly_co2_emissions_1000_tonnes',
    'cumulative_co2_emissions_tonnes',
    'co2_emissions_tonnes_per_person'
]

In [57]:
cdf = pd.DataFrame(concepts, columns=['concept'])

In [58]:
cdf['name'] = [
    'Name',
    'Indicator URL',
    'Unit',
    'Country',
    'Year',
    'Yearly CO2 emissions',
    'Cumulative CO2 emissions',
    'CO2 per capita'
]

In [59]:
cdf = cdf.set_index('concept')

In [60]:
cdf['concept_type'] = 'measure'
cdf.ix[['name', 'indicator_url', 'unit'], 'concept_type'] = 'string'
cdf.ix['country', 'concept_type'] = 'entity_domain'
cdf.ix['year', 'concept_type'] = 'time'

In [61]:
cdf.ix[cdf.concept_type == 'measure', 'indicator_url'] = 'https://github.com/open-numbers/ddf--gapminder--co2_emission'

In [62]:
cdf.ix['yearly_co2_emissions_1000_tonnes', 'unit'] = '1000 mertic tons'
cdf.ix['cumulative_co2_emissions_tonnes', 'unit'] = 'mertic tons'
cdf.ix['co2_emissions_tonnes_per_person', 'unit'] = 'metric tons per person'

In [63]:
cdf

Unnamed: 0_level_0,name,concept_type,indicator_url,unit
concept,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
name,Name,string,,
indicator_url,Indicator URL,string,,
unit,Unit,string,,
country,Country,entity_domain,,
year,Year,time,,
yearly_co2_emissions_1000_tonnes,Yearly CO2 emissions,measure,https://github.com/open-numbers/ddf--gapminder...,1000 mertic tons
cumulative_co2_emissions_tonnes,Cumulative CO2 emissions,measure,https://github.com/open-numbers/ddf--gapminder...,mertic tons
co2_emissions_tonnes_per_person,CO2 per capita,measure,https://github.com/open-numbers/ddf--gapminder...,metric tons per person


In [64]:
cdf.to_csv('../../ddf--concepts.csv')

# entity

In [65]:
# just copy the GM geo domain

geo[['country', 'name']].to_csv('../../ddf--entities--country.csv', index=False)

In [66]:
!validate-ddf ../../

[
{}]

