In [1]:
import pandas as pd
import json
import os
import numpy as np

In [2]:
cwd = os.getcwd()
df = pd.read_csv(f'{cwd}\greenhouse_gas_inventory_data_data.csv')
df

Unnamed: 0,country_or_area,year,value,category
0,Australia,2014,393126.946994,carbon_dioxide_co2_emissions_without_land_use_...
1,Australia,2013,396913.936530,carbon_dioxide_co2_emissions_without_land_use_...
2,Australia,2012,406462.847704,carbon_dioxide_co2_emissions_without_land_use_...
3,Australia,2011,403705.528314,carbon_dioxide_co2_emissions_without_land_use_...
4,Australia,2010,406200.993184,carbon_dioxide_co2_emissions_without_land_use_...
...,...,...,...,...
8401,United States of America,1994,593.222570,unspecified_mix_of_hydrofluorocarbons_hfcs_and...
8402,United States of America,1993,586.939752,unspecified_mix_of_hydrofluorocarbons_hfcs_and...
8403,United States of America,1992,574.414624,unspecified_mix_of_hydrofluorocarbons_hfcs_and...
8404,United States of America,1991,566.900968,unspecified_mix_of_hydrofluorocarbons_hfcs_and...


In [3]:
df['country_or_area'].unique()

array(['Australia', 'Austria', 'Belarus', 'Belgium', 'Bulgaria', 'Canada',
       'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia',
       'European Union', 'Finland', 'France', 'Germany', 'Greece',
       'Hungary', 'Iceland', 'Ireland', 'Italy', 'Japan', 'Latvia',
       'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Monaco',
       'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal',
       'Romania', 'Russian Federation', 'Slovakia', 'Slovenia', 'Spain',
       'Sweden', 'Switzerland', 'Turkey', 'Ukraine', 'United Kingdom',
       'United States of America'], dtype=object)

<h5>European Union is not a country, so we remove it from the list.</h5>

In [7]:
df = df[df['country_or_area'] != 'European Union']

In [8]:
df['country_or_area'].unique()

array(['Australia', 'Austria', 'Belarus', 'Belgium', 'Bulgaria', 'Canada',
       'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia',
       'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland',
       'Ireland', 'Italy', 'Japan', 'Latvia', 'Liechtenstein',
       'Lithuania', 'Luxembourg', 'Malta', 'Monaco', 'Netherlands',
       'New Zealand', 'Norway', 'Poland', 'Portugal', 'Romania',
       'Russian Federation', 'Slovakia', 'Slovenia', 'Spain', 'Sweden',
       'Switzerland', 'Turkey', 'Ukraine', 'United Kingdom',
       'United States of America'], dtype=object)

In [3]:
gas_list = df['category'].unique()
gas_list

array(['carbon_dioxide_co2_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'greenhouse_gas_ghgs_emissions_including_indirect_co2_without_lulucf_in_kilotonne_co2_equivalent',
       'greenhouse_gas_ghgs_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'hydrofluorocarbons_hfcs_emissions_in_kilotonne_co2_equivalent',
       'methane_ch4_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'nitrogen_trifluoride_nf3_emissions_in_kilotonne_co2_equivalent',
       'nitrous_oxide_n2o_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent',
       'sulphur_hexafluoride_sf6_emissions_in_kilotonne_co2_equivalent',
       'unspecified_mix_of_hydrofluorocarbons_hfcs_and_perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent'],
      dtype=obj

In [4]:
for gas_name in gas_list:
    count = len(df[df['category'] == gas_name].index)
    print(f"{gas_name}: {count}")

carbon_dioxide_co2_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent: 1074
greenhouse_gas_ghgs_emissions_including_indirect_co2_without_lulucf_in_kilotonne_co2_equivalent: 949
greenhouse_gas_ghgs_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent: 1074
hydrofluorocarbons_hfcs_emissions_in_kilotonne_co2_equivalent: 975
methane_ch4_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent: 1074
nitrogen_trifluoride_nf3_emissions_in_kilotonne_co2_equivalent: 248
nitrous_oxide_n2o_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent: 1074
perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent: 831
sulphur_hexafluoride_sf6_emissions_in_kilotonne_co2_equivalent: 1032
unspecified_mix_of_hydrofluorocarbons_hfcs_and_perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent: 75


<ul><h4>Data is irregular and following can be done to manage that:</h4></ul>
<li>Data under category <em>"unspecified mix of hydrofluorocarbons.. "</em>  can be ignored as it has very few data points</li>
<li>For the ghgs, we can exclude the <em>greenhouse_gas_ghgs_emissions_including_indirect_co2</em>.</li>
<br>


In [5]:
gas_list = np.delete(gas_list, [1,-1])

<h3>Creating seperate json files for each greenhouse gas </h3>

In [6]:
files_list = ['co2.json', 'ghgs.json', 'hfc.json', 'ch4.json', 'nf3.json', 'n2o.json', 'pfc.json', 'sf6.json']

In [14]:
file_index = 0
for gas_name in gas_list:
    data_dict = {}
    new_df = df[df['category']==gas_name]
    country_list = new_df['country_or_area'].unique()
    
    ##iterate over list of countries who have the specific gas data
    for country in country_list:
        temp_df = new_df[new_df['country_or_area'] == country]
        temp_arr = temp_df[['year', 'value']].values.astype(int)
        temp_dict = dict(temp_arr.tolist())
        data_dict[country] = temp_dict
        
    ## writing data into files
    with open(files_list[file_index], 'w') as data_file:
        json.dump(data_dict, data_file)
    file_index += 1
    