# Data Preparation

The purpose of this notebook is to join the energy, population, and income dataframes and calculate dataframes for
- Renewable energy consumption per capita change between 1990 and 2015
- Renewable energy consumption per capita/Total energy consumption per capita between 1990 and 2015

## Import Data

In [1]:
# Load dependencies
import numpy as np
import pandas as pd

In [2]:
# Import data
energy_df = pd.read_csv('data/clean/energy.csv')
pop_df = pd.read_csv('data/clean/pop.csv')
income_df = pd.read_csv('data/clean/income.csv')

In [5]:
# Check df's
energy_df.head()
pop_df.head()
income_df.head()

Unnamed: 0,Country Name,Country Code,Region,IncomeGroup
0,Aruba,ABW,Latin America & Caribbean,High income
1,Afghanistan,AFG,South Asia,Low income
2,Angola,AGO,Sub-Saharan Africa,Lower middle income
3,Albania,ALB,Europe & Central Asia,Upper middle income
4,Andorra,AND,Europe & Central Asia,High income


## Join df's

In [21]:
# Join energy_df, pop_df, income_df
energy_pop = pd.merge(energy_df, pop_df, how='left', left_on=['Country Code', 'Year'], right_on=['Country Code', 'Year'])
all_data_df = pd.merge(energy_pop, income_df, how='left', left_on='Country Code', right_on='Country Code')

In [23]:
# Drop Country Name_y, Country Name
all_data_df.drop(axis=1, columns=['Country Name_y','Country Name'], inplace=True)
all_data_df.rename(columns={'Country Name_x':'Country Name'}, inplace=True)

In [24]:
# Join df's
all_data_df.head()

Unnamed: 0,Country Name,Country Code,Year,Access to Clean Fuels and Technologies for cooking (% of total population),Access to electricity (% of rural population with access),Access to electricity (% of total population),Access to electricity (% of urban population with access),Energy intensity level of primary energy (MJ/2011 USD PPP),Renewable electricity output (GWh),Renewable electricity share of total electricity output (%),Renewable energy consumption (TJ),Renewable energy share of TFEC (%),Total electricity output (GWh),Total final energy consumption (TFEC) (TJ),Population,Region,IncomeGroup
0,Afghanistan,AFG,1990,,,0.01,52.04,1.88,764.0,67.73,6312.39,15.92,1128.0,39639.42,12249114.0,South Asia,Low income
1,Afghanistan,AFG,1991,,,0.01,53.81,2.0,690.0,67.98,6361.65,17.04,1015.0,37341.43,12993657.0,South Asia,Low income
2,Afghanistan,AFG,1992,,,0.01,55.58,1.33,478.0,67.99,6546.36,26.52,703.0,24683.11,13981231.0,South Asia,Low income
3,Afghanistan,AFG,1993,,,0.01,57.35,1.76,475.0,68.35,7849.65,30.59,695.0,25664.47,15095099.0,South Asia,Low income
4,Afghanistan,AFG,1994,,,0.01,59.12,2.25,472.0,68.7,8305.31,32.8,687.0,25323.96,16172719.0,South Asia,Low income


In [25]:
all_data_df.head()

Unnamed: 0,Country Name,Country Code,Year,Access to Clean Fuels and Technologies for cooking (% of total population),Access to electricity (% of rural population with access),Access to electricity (% of total population),Access to electricity (% of urban population with access),Energy intensity level of primary energy (MJ/2011 USD PPP),Renewable electricity output (GWh),Renewable electricity share of total electricity output (%),Renewable energy consumption (TJ),Renewable energy share of TFEC (%),Total electricity output (GWh),Total final energy consumption (TFEC) (TJ),Population,Region,IncomeGroup
0,Afghanistan,AFG,1990,,,0.01,52.04,1.88,764.0,67.73,6312.39,15.92,1128.0,39639.42,12249114.0,South Asia,Low income
1,Afghanistan,AFG,1991,,,0.01,53.81,2.0,690.0,67.98,6361.65,17.04,1015.0,37341.43,12993657.0,South Asia,Low income
2,Afghanistan,AFG,1992,,,0.01,55.58,1.33,478.0,67.99,6546.36,26.52,703.0,24683.11,13981231.0,South Asia,Low income
3,Afghanistan,AFG,1993,,,0.01,57.35,1.76,475.0,68.35,7849.65,30.59,695.0,25664.47,15095099.0,South Asia,Low income
4,Afghanistan,AFG,1994,,,0.01,59.12,2.25,472.0,68.7,8305.31,32.8,687.0,25323.96,16172719.0,South Asia,Low income


## Table Calculations

In [61]:
# Calculate total energy consumption per capita
all_data_df['Per Capita Total Energy Consumption'] = all_data_df['Total final energy consumption (TFEC) (TJ)']/all_data_df['Population']
# Calculate renewable energy consumption per capita
all_data_df['Per Capita Renewable Energy Consumption'] = all_data_df['Renewable energy consumption (TJ)']/all_data_df['Population']

In [62]:
# Check new columns
all_data_df.head()

Unnamed: 0,Country Name,Country Code,Year,Access to Clean Fuels and Technologies for cooking (% of total population),Access to electricity (% of rural population with access),Access to electricity (% of total population),Access to electricity (% of urban population with access),Energy intensity level of primary energy (MJ/2011 USD PPP),Renewable electricity output (GWh),Renewable electricity share of total electricity output (%),Renewable energy consumption (TJ),Renewable energy share of TFEC (%),Total electricity output (GWh),Total final energy consumption (TFEC) (TJ),Population,Region,IncomeGroup,Per Capita Total Energy Consumption,Per Capita Renewable Energy Consumption
0,Afghanistan,AFG,1990,,,0.01,52.04,1.88,764.0,67.73,6312.39,15.92,1128.0,39639.42,12249114.0,South Asia,Low income,0.003236,0.000515
1,Afghanistan,AFG,1991,,,0.01,53.81,2.0,690.0,67.98,6361.65,17.04,1015.0,37341.43,12993657.0,South Asia,Low income,0.002874,0.00049
2,Afghanistan,AFG,1992,,,0.01,55.58,1.33,478.0,67.99,6546.36,26.52,703.0,24683.11,13981231.0,South Asia,Low income,0.001765,0.000468
3,Afghanistan,AFG,1993,,,0.01,57.35,1.76,475.0,68.35,7849.65,30.59,695.0,25664.47,15095099.0,South Asia,Low income,0.0017,0.00052
4,Afghanistan,AFG,1994,,,0.01,59.12,2.25,472.0,68.7,8305.31,32.8,687.0,25323.96,16172719.0,South Asia,Low income,0.001566,0.000514


In [65]:
# Get 1990 and 2015 df's
df_1990 = all_data_df.query('Year == 1990').reset_index(drop=True)
df_2015 = all_data_df.query('Year == 2015').reset_index(drop=True)

In [70]:
# Join df_1990 and df_2015 on Country Name
df_1990_2015 = pd.merge(df_1990, df_2015, on='Country Code', suffixes=('_1990', '_2015'))

In [71]:
df_1990_2015.head()

Unnamed: 0,Country Name_1990,Country Code,Year_1990,Access to Clean Fuels and Technologies for cooking (% of total population)_1990,Access to electricity (% of rural population with access)_1990,Access to electricity (% of total population)_1990,Access to electricity (% of urban population with access)_1990,Energy intensity level of primary energy (MJ/2011 USD PPP)_1990,Renewable electricity output (GWh)_1990,Renewable electricity share of total electricity output (%)_1990,...,Renewable electricity share of total electricity output (%)_2015,Renewable energy consumption (TJ)_2015,Renewable energy share of TFEC (%)_2015,Total electricity output (GWh)_2015,Total final energy consumption (TFEC) (TJ)_2015,Population_2015,Region_2015,IncomeGroup_2015,Per Capita Total Energy Consumption_2015,Per Capita Renewable Energy Consumption_2015
0,Afghanistan,AFG,1990,,,0.01,52.04,1.88,764.0,67.73,...,,25134.53,18.42,,136426.64,33736494.0,South Asia,Low income,0.004044,0.000745
1,Albania,ALB,1990,,100.0,100.0,100.0,7.91,2848.0,86.41,...,,31555.45,38.62,,81717.67,2880703.0,Europe & Central Asia,Upper middle income,0.028367,0.010954
2,Algeria,DZA,1990,,96.39,98.27,100.0,3.5,135.0,0.84,...,,833.67,0.06,,1413989.84,39871528.0,Middle East & North Africa,Upper middle income,0.035464,2.1e-05
3,American Samoa,ASM,1990,,,,,,0.0,0.0,...,,4.97,0.89,,561.06,55537.0,East Asia & Pacific,Upper middle income,0.010102,8.9e-05
4,Andorra,AND,1990,,100.0,100.0,100.0,,120.0,100.0,...,,1642.52,19.75,,8317.48,78014.0,Europe & Central Asia,High income,0.106615,0.021054


## Export Data

In [73]:
# Export df
df_1990_2015.to_csv('data/clean/1990_2015_data.csv', index=False)

In [76]:
# Check that data was exported correctly
pd.read_csv('data/clean/1990_2015_data.csv').info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Data columns (total 37 columns):
Country Name_1990                                                                  259 non-null object
Country Code                                                                       259 non-null object
Year_1990                                                                          259 non-null int64
Access to Clean Fuels and Technologies for cooking (% of total population)_1990    0 non-null float64
Access to electricity (% of rural population with access)_1990                     164 non-null float64
Access to electricity (% of total population)_1990                                 207 non-null float64
Access to electricity (% of urban population with access)_1990                     207 non-null float64
Energy intensity level of primary energy (MJ/2011 USD PPP)_1990                    184 non-null float64
Renewable electricity output (GWh)_1990                              