In [11]:
import pandas as pd
import numpy as np
import json
import os

In [12]:
df = pd.read_csv('data/Panel-format.csv')
df.head()

Unnamed: 0,Country,Year,pop,ISO3166_alpha3,ISO3166_numeric,Region,SubRegion,OPEC,EU,OECD,...,ren_power_ej,ren_power_twh,ren_power_twh_net,renewables_ej,solar_ej,solar_twh,solar_twh_net,wind_ej,wind_twh,wind_twh_net
0,Algeria,1965,12.381256,DZA,12.0,Africa,Northern Africa,1.0,0.0,0.0,...,,,,,,,,,,
1,Algeria,1966,12.613389,DZA,12.0,Africa,Northern Africa,1.0,0.0,0.0,...,,,,,,,,,,
2,Algeria,1967,12.897116,DZA,12.0,Africa,Northern Africa,1.0,0.0,0.0,...,,,,,,,,,,
3,Algeria,1968,13.190975,DZA,12.0,Africa,Northern Africa,1.0,0.0,0.0,...,,,,,,,,,,
4,Algeria,1969,13.491016,DZA,12.0,Africa,Northern Africa,1.0,0.0,0.0,...,,,,,,,,,,


In [13]:
numerical_cols = [col for col in df.columns if df[col].dtype == 'float64' or df[col].dtype == 'int64']
categorical_cols = [col for col in df.columns if df[col].dtype == 'object']

df[numerical_cols].isnull().sum()

Year                  0
pop                1460
ISO3166_numeric    1774
OPEC               1774
EU                 1774
                   ... 
solar_twh          2540
solar_twh_net      2540
wind_ej            2464
wind_twh           2464
wind_twh_net       2464
Length: 97, dtype: int64

In [14]:
# print percentage of missing values in each column
for col in df.columns:
    pct_missing = np.mean(df[col].isnull())
    print('{} - {}%'.format(col, round(pct_missing*100)))


Country - 0%
Year - 0%
pop - 20%
ISO3166_alpha3 - 0%
ISO3166_numeric - 25%
Region - 25%
SubRegion - 25%
OPEC - 25%
EU - 25%
OECD - 25%
CIS - 25%
biodiesel_cons_kboed - 90%
biodiesel_cons_pj - 90%
biodiesel_prod_kboed - 95%
biodiesel_prod_pj - 95%
biofuels_cons_ej - 65%
biofuels_cons_kbd - 65%
biofuels_cons_kboed - 79%
biofuels_cons_pj - 79%
biofuels_prod_kbd - 86%
biofuels_prod_kboed - 86%
biofuels_prod_pj - 86%
biogeo_ej - 31%
biogeo_twh - 31%
biogeo_twh_net - 31%
co2_combust_mtco2 - 20%
co2_combust_pc - 20%
co2_combust_per_ej - 20%
co2_mtco2 - 57%
coalcons_ej - 24%
coalprod_ej - 70%
coalprod_mt - 70%
cobalt_kt - 95%
cobaltres_kt - 100%
diesel_gasoil_cons_kbd - 90%
elect_twh - 45%
electbyfuel_coal - 73%
electbyfuel_gas - 73%
electbyfuel_hydro - 60%
electbyfuel_nuclear - 66%
electbyfuel_oil - 73%
electbyfuel_other - 73%
electbyfuel_ren_power - 63%
electbyfuel_total - 73%
ethanol_cons_kboed - 90%
ethanol_cons_pj - 90%
ethanol_prod_kboed - 93%
ethanol_prod_pj - 93%
fuel_oil_cons_kbd - 87

In [15]:
# fill all null values with 0 since anyways year is never 0
df[numerical_cols] = df[numerical_cols].fillna(0)
df[numerical_cols].isnull().sum()

Year               0
pop                0
ISO3166_numeric    0
OPEC               0
EU                 0
                  ..
solar_twh          0
solar_twh_net      0
wind_ej            0
wind_twh           0
wind_twh_net       0
Length: 97, dtype: int64

In [16]:
new_df = df.copy()

new_df.drop(['OPEC','EU','OECD','CIS', 'ISO3166_alpha3', 'ISO3166_numeric'], axis=1, inplace=True)
new_df.head()

Unnamed: 0,Country,Year,pop,Region,SubRegion,biodiesel_cons_kboed,biodiesel_cons_pj,biodiesel_prod_kboed,biodiesel_prod_pj,biofuels_cons_ej,...,ren_power_ej,ren_power_twh,ren_power_twh_net,renewables_ej,solar_ej,solar_twh,solar_twh_net,wind_ej,wind_twh,wind_twh_net
0,Algeria,1965,12.381256,Africa,Northern Africa,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Algeria,1966,12.613389,Africa,Northern Africa,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Algeria,1967,12.897116,Africa,Northern Africa,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Algeria,1968,13.190975,Africa,Northern Africa,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Algeria,1969,13.491016,Africa,Northern Africa,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Splitting Data

In [17]:
# save the dataframe as a csv file
fileName = 'data/Energy-Data-Edited.csv'
new_df.to_csv(fileName, index=False)

In [18]:
new_df['Year'].unique()

array([1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975,
       1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986,
       1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
       1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
       2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021, 2022], dtype=int64)

### `valueLabel` & `yearLabel` params
- Change the valueLabel to one of: ["solar_ej", "biofuels_cons_ej","biogeo_ej","gascons_ej","nuclear_ej","oilcons_ej","renewables_ej","wind_ej"]   
- Change yearLabel to one of the years in the dataset: 1965 to 2022  

This will generate a json file for the selected year and value.

In [19]:
valueLabel = "nuclear_ej"
yearLabel = 2002

In [20]:
# new_df.columns
colsToKeep = ["Country", "Year", "Region", valueLabel]
# colsToKeep =  ["Country", "Year", "Region", "solar_ej", "biofuels_cons_ej","biogeo_ej","gascons_ej","nuclear_ej","oilcons_ej","renewables_ej","wind_ej"]
new_df = new_df[colsToKeep]

In [21]:
# remove rows with Region = NaN
new_df = new_df[new_df['Region'].notna()]
new_df = new_df[new_df['Year'] == yearLabel]

In [22]:
new_df['Region'].value_counts(), len(new_df)

(Region
 Europe                33
 Asia Pacific          21
 Africa                16
 S. & Cent. America    11
 Middle East           11
 CIS                    6
 North America          3
 Name: count, dtype: int64,
 101)

#### First JSON file that holds aggregated data of all Regions

In [23]:
# region_df = new_df.drop(['Country', 'Year'], axis=1)
region_df = new_df.drop(['Country'], axis=1)

In [24]:
# region_df groupby on Region and year
region_df = region_df.groupby(['Region', 'Year']).sum()
region_df = region_df.reset_index()

In [25]:
region_df['Parent'] = 'World'
region_df['label'] = region_df['Region']
region_df['svalue'] = region_df[valueLabel] / max(region_df[valueLabel])
region_df = region_df.rename(columns={valueLabel: 'value',})
region_df.drop(['Region'], axis=1, inplace=True)

In [26]:
region_df.head(10)

Unnamed: 0,Year,value,Parent,label,svalue
0,2002,0.120908,World,Africa,0.010948
1,2002,5.236126,World,Asia Pacific,0.474138
2,2002,1.42808,World,CIS,0.129314
3,2002,11.043467,World,Europe,1.0
4,2002,0.0,World,Middle East,0.0
5,2002,9.134077,World,North America,0.827102
6,2002,0.198206,World,S. & Cent. America,0.017948


In [27]:
def saveJsonFile(fileName, jsonData):
    outfile = json.dumps(jsonData, indent=4)
    with open(fileName, 'w') as f:
        f.write(outfile)

In [28]:
jsonRegion = json.loads(region_df.to_json(orient='records'))

In [29]:
def createHierarchicalJsonfileRegionWise(jsonRegion):
    root = {}
    root['Parent'] = "Root"
    root['label'] = "World"
    root['value'] = 0
    root['data'] = []

    totalvalueLabel = 0
    for i in range(len(jsonRegion)):
        totalvalueLabel += jsonRegion[i]['value']
        root['data'].append(jsonRegion[i])

    root['value'] = totalvalueLabel
    return root


In [30]:
jsonRegionHierarchical = createHierarchicalJsonfileRegionWise(jsonRegion)
# saveJsonFile('data/Energy-Data-RegionWise-Hierarchical.json', jsonRegionHierarchical)

#### Second JSON file that holds aggregated data of all Countries with their Parent as corresponding Regions

In [31]:
# country_df = new_df.drop(['Year'], axis=1)
country_df = new_df

In [32]:
# rename columns: Region to Parent, Country to label
country_df['svalue'] = country_df[valueLabel] / max(country_df[valueLabel])
country_df = country_df.rename(columns={'Region': 'Parent', 'Country': 'label', valueLabel: 'value'})

In [33]:
country_df.head(10)

Unnamed: 0,label,Year,Parent,value,svalue
37,Algeria,2002,Africa,0.0,0.0
95,Angola,2002,Africa,0.0,0.0
153,Argentina,2002,S. & Cent. America,0.058695,0.007089
211,Australia,2002,Asia Pacific,0.0,0.0
269,Austria,2002,Europe,0.0,0.0
312,Azerbaijan,2002,CIS,0.0,0.0
370,Bahrain,2002,Middle East,0.0,0.0
422,Bangladesh,2002,Asia Pacific,0.0,0.0
465,Belarus,2002,CIS,0.0,0.0
523,Belgium,2002,Europe,0.477543,0.057677


In [34]:
jsonCountry = json.loads(country_df.to_json(orient='records'))

In [35]:
def createHierarchicalJsonfileCountryWise(jsonRegionHierarchical, jsonCountry):
    for i in range(len(jsonRegionHierarchical['data'])):
        jsonRegionHierarchical['data'][i]['data'] = []

    for i in range(len(jsonCountry)):
        for j in range(len(jsonRegionHierarchical['data'])):
            if jsonCountry[i]['Parent'] == jsonRegionHierarchical['data'][j]['label']:
                jsonRegionHierarchical['data'][j]['data'].append(jsonCountry[i])
                break
        
    return jsonRegionHierarchical

In [36]:
jsonCountryHierarchical = createHierarchicalJsonfileCountryWise(jsonRegionHierarchical, jsonCountry)

In [37]:
saveJsonFile(f'data/CountryWise_{valueLabel}_{yearLabel}.json', jsonCountryHierarchical)

In [38]:
sValuesRegion = []
for i in range(len(jsonCountryHierarchical['data'])):
    sValuesRegion.append(jsonCountryHierarchical['data'][i]['svalue'])
sValuesRegion

[0.0109483918,
 0.4741378342,
 0.1293144652,
 1.0,
 0.0,
 0.8271023025,
 0.0179478388]

In [39]:
sValuesCountry = []
for i in range(len(jsonCountryHierarchical['data'])):
    for j in range(len(jsonCountryHierarchical['data'][i]['data'])):
        sValuesCountry.append(jsonCountryHierarchical['data'][i]['data'][j]['svalue'])
sValuesCountry

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0146032232,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0306008829,
 0.0,
 0.0235684807,
 0.0,
 0.3827191288,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0023089468,
 0.0,
 0.0,
 0.1450493127,
 0.0,
 0.0481694728,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.1724826832,
 0.0,
 0.0,
 0.0,
 0.0576773118,
 0.0246273351,
 0.0,
 0.0,
 0.0228202925,
 0.0,
 0.0,
 0.0271519359,
 0.5319075867,
 0.200752608,
 0.0,
 0.0169926418,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0172228158,
 0.0,
 0.0047660994,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0067139997,
 0.0218640367,
 0.0067322677,
 0.0767439555,
 0.0829488939,
 0.0329357572,
 0.0,
 0.0949800164,
 0.1069860287,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0913386469,
 0.0118699095,
 1.0,
 0.0070890972,
 0.0,
 0.0168501535,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

### Move JSON files to the public folder of the React App

In [45]:
for fileName in os.listdir(path='./data'):
    if fileName.endswith('.json') and fileName.startswith('CountryWise'):
        # move the json file to public folder
        os.rename(f'./data/{fileName}', f'./public/{fileName}')