<a href="https://colab.research.google.com/github/mohannashahrad/Borealis_AI_Plant_Tree_Project/blob/main/Final_Data/final_preprocessing_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import all the required libraries
import pandas as pd
import requests
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

In [2]:
# Define all the functions
all_dfs = []

def load_DF(url):
  data = StringIO(requests.get(url).text)
  df = pd.read_csv(data, skiprows=[0,1,2,3])
  return df

def clean_and_melt_df(df,value):
  df = df.iloc[:, :-1]
  df.drop(['Country Code','Indicator Code', 'Indicator Name'], 1, inplace=True)
  df.rename({'Country Name':'Country'}, axis=1, inplace=True)
  df = pd.melt(df, id_vars='Country', var_name='Time', value_name=value)
  df['Time'] = df['Time'].astype(int)
  df = df[df.Time >= 1960]
  df = df[df.Time <= 2020]
  return df   

def get_index(df):
  return df.set_index(['Country', 'Time'])

In [3]:
# Loading Datasets

# Load Population Datasets
pop_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_total(count).csv')
pop_df = clean_and_melt_df(pop_df,'Population')
all_dfs.append(pop_df)

pop_growth_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_growth(%25year).csv')
pop_growth_df = clean_and_melt_df(pop_growth_df,'Pop Growth (%)')
all_dfs.append(pop_growth_df)

pop_urban_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_urban(%25).csv')
pop_urban_df = clean_and_melt_df(pop_urban_df,'Urban Pop (%)')
all_dfs.append(pop_urban_df)


# Load Land Datasets
land_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_total(m2).csv')
land_df = clean_and_melt_df(land_df,'Land Area (m2)')
all_dfs.append(land_df)

land_agrc_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_agriculture(%25).csv')
land_agrc_df = clean_and_melt_df(land_agrc_df,'Agriculture Land (%)')
all_dfs.append(land_agrc_df)

land_forest_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_forest(%25).csv')
land_forest_df = clean_and_melt_df(land_forest_df,'Forest Land (%)')
all_dfs.append(land_forest_df)


# Load GDP Datasets
gdp_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_total(usd).csv')
gdp_df = clean_and_melt_df(gdp_df,'GDP (US$)')
all_dfs.append(gdp_df)

gdp_growth_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_growth(%25year).csv')
gdp_growth_df = clean_and_melt_df(gdp_growth_df,'GDP Growth (%)')
all_dfs.append(gdp_growth_df)

gdp_forest_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_forest_rents(%25).csv')
gdp_forest_df = clean_and_melt_df(gdp_forest_df,'Forest Rents (% GDP)')
all_dfs.append(gdp_forest_df)

gdp_coal_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_coal_rents(%25).csv')
gdp_coal_df = clean_and_melt_df(gdp_coal_df,'Coal Rents (% GDP)')
all_dfs.append(gdp_coal_df)

gdp_oil_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_oil_rents(%25).csv')
gdp_oil_df = clean_and_melt_df(gdp_oil_df,'Oil Rents (% GDP)')
all_dfs.append(gdp_oil_df)


# Load CO2 and GHG Emission Datasets
CO2_emission_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/co2_emissions(kt).csv')
CO2_emission_df = clean_and_melt_df(CO2_emission_df,'CO2 Emission (kt)')
all_dfs.append(CO2_emission_df)

GHG_emission_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/total_GHG_emissons_co2eqv(kt).csv')
GHG_emission_df = clean_and_melt_df(GHG_emission_df,'GHG Emision (CO2 eqv)')
all_dfs.append(GHG_emission_df)

for df in all_dfs:
  display(df)

Unnamed: 0,Country,Time,Population
0,Aruba,1960,54208.0
1,Africa Eastern and Southern,1960,130836765.0
2,Afghanistan,1960,8996967.0
3,Africa Western and Central,1960,96396419.0
4,Angola,1960,5454938.0
...,...,...,...
16221,Kosovo,2020,1775378.0
16222,"Yemen, Rep.",2020,29825968.0
16223,South Africa,2020,59308690.0
16224,Zambia,2020,18383956.0


Unnamed: 0,Country,Time,Pop Growth (%)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,2.034308
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,-0.757525
16222,"Yemen, Rep.",2020,2.251561
16223,South Africa,2020,1.273356
16224,Zambia,2020,2.885686


Unnamed: 0,Country,Time,Urban Pop (%)
0,Aruba,1960,50.776000
1,Africa Eastern and Southern,1960,14.704688
2,Afghanistan,1960,8.401000
3,Africa Western and Central,1960,14.670329
4,Angola,1960,10.435000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,37.908000
16223,South Africa,2020,67.354000
16224,Zambia,2020,44.629000


Unnamed: 0,Country,Time,Land Area (m2)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,527970.0
16223,South Africa,2020,1213090.0
16224,Zambia,2020,743390.0


Unnamed: 0,Country,Time,Agriculture Land (%)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Forest Land (%)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,1.039832
16223,South Africa,2020,14.055091
16224,Zambia,2020,60.283337


Unnamed: 0,Country,Time,GDP (US$)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,1.934248e+10
2,Afghanistan,1960,5.377778e+08
3,Africa Western and Central,1960,1.040732e+10
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,7.611402e+09
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,3.019236e+11
16224,Zambia,2020,1.932005e+10


Unnamed: 0,Country,Time,GDP Growth (%)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,-6.888013
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,-6.959604
16224,Zambia,2020,-3.016189


Unnamed: 0,Country,Time,Forest Rents (% GDP)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Coal Rents (% GDP)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Oil Rents (% GDP)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,CO2 Emission (kt)
0,Aruba,1960,11092.675000
1,Africa Eastern and Southern,1960,118545.901306
2,Afghanistan,1960,414.371000
3,Africa Western and Central,1960,8760.463000
4,Angola,1960,550.050000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,GHG Emision (CO2 eqv)
0,Aruba,1960,
1,Africa Eastern and Southern,1960,
2,Afghanistan,1960,
3,Africa Western and Central,1960,
4,Angola,1960,
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


In [4]:
rest_indexes = list(map(get_index, all_dfs))
first_index = rest_indexes.pop(0)

merged_df = first_index.join(rest_indexes, how='outer')
merged_df.reset_index(inplace=True)
display(merged_df[merged_df['Country'] == "Afghanistan"])
merged_df.to_csv("final_data.csv")

Unnamed: 0,Country,Time,Population,Pop Growth (%),Urban Pop (%),Land Area (m2),Agriculture Land (%),Forest Land (%),GDP (US$),GDP Growth (%),Forest Rents (% GDP),Coal Rents (% GDP),Oil Rents (% GDP),CO2 Emission (kt),GHG Emision (CO2 eqv)
2,Afghanistan,1960,8996967.0,,8.401,,,,5.377778e+08,,,,,414.371,
268,Afghanistan,1961,9169406.0,1.898499,8.684,652860.0,57.745918,,5.488889e+08,,,,,491.378,
534,Afghanistan,1962,9351442.0,1.965805,8.976,652860.0,57.837821,,5.466667e+08,,,,,689.396,
800,Afghanistan,1963,9543200.0,2.029830,9.276,652860.0,57.914407,,7.511112e+08,,,,,707.731,
1066,Afghanistan,1964,9744772.0,2.090208,9.586,652860.0,58.010906,,8.000000e+08,,,,,839.743,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14898,Afghanistan,2016,35383028.0,2.778035,25.020,652860.0,58.067580,1.850994,1.801775e+10,2.260314,0.397862,0.247302,0.000872,7390.000,95370.0
15164,Afghanistan,2017,36296111.0,2.547833,25.250,652860.0,58.067580,1.850994,1.886995e+10,2.647003,0.267270,0.397627,0.001296,7380.000,97300.0
15430,Afghanistan,2018,37171922.0,2.384309,25.495,652860.0,58.081365,1.850994,1.835388e+10,1.189228,0.335310,0.535543,0.001939,7440.000,98920.0
15696,Afghanistan,2019,38041757.0,2.313073,25.754,652860.0,,1.850994,1.929110e+10,3.911603,0.367809,0.373840,0.001628,,
