<a href="https://colab.research.google.com/github/mohannashahrad/Borealis_AI_Plant_Tree_Project/blob/main/Final_Data/final_preprocessing_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import all the required libraries
import pandas as pd
import requests
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

In [None]:
# Define all the functions
all_dfs = []

def load_DF(url):
  data = StringIO(requests.get(url).text)
  df = pd.read_csv(data, skiprows=[0,1,2,3])
  return df

def clean_and_melt_df(df,value):
  df = df.iloc[:, :-1]
  df.drop(['Country Code','Indicator Code', 'Indicator Name'], 1, inplace=True)
  df.rename({'Country Name':'Country'}, axis=1, inplace=True)
  df = pd.melt(df, id_vars='Country', var_name='Time', value_name=value)
  df['Time'] = df['Time'].astype(int)
  df = df[df.Time >= 1990]
  df = df[df.Time <= 2020]
  return df   

def get_index(df):
  return df.set_index(['Country', 'Time'])

In [None]:
# Loading Datasets

# Load Population Datasets
pop_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_total(count).csv')
pop_df = clean_and_melt_df(pop_df,'Population')
all_dfs.append(pop_df)

pop_growth_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_growth(%25year).csv')
pop_growth_df = clean_and_melt_df(pop_growth_df,'Pop Growth (%)')
all_dfs.append(pop_growth_df)

pop_urban_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/population_urban(%25).csv')
pop_urban_df = clean_and_melt_df(pop_urban_df,'Urban Pop (%)')
all_dfs.append(pop_urban_df)


# Load Land Datasets
land_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_total(m2).csv')
land_df = clean_and_melt_df(land_df,'Land Area (m2)')
all_dfs.append(land_df)

land_agrc_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_agriculture(%25).csv')
land_agrc_df = clean_and_melt_df(land_agrc_df,'Agriculture Land (%)')
all_dfs.append(land_agrc_df)

land_forest_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/land_forest(%25).csv')
land_forest_df = clean_and_melt_df(land_forest_df,'Forest Land (%)')
all_dfs.append(land_forest_df)


# Load GDP Datasets
gdp_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_total(usd).csv')
gdp_df = clean_and_melt_df(gdp_df,'GDP (US$)')
all_dfs.append(gdp_df)

gdp_growth_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_growth(%25year).csv')
gdp_growth_df = clean_and_melt_df(gdp_growth_df,'GDP Growth (%)')
all_dfs.append(gdp_growth_df)

gdp_forest_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_forest_rents(%25).csv')
gdp_forest_df = clean_and_melt_df(gdp_forest_df,'Forest Rents (% GDP)')
all_dfs.append(gdp_forest_df)

gdp_coal_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_coal_rents(%25).csv')
gdp_coal_df = clean_and_melt_df(gdp_coal_df,'Coal Rents (% GDP)')
all_dfs.append(gdp_coal_df)

gdp_oil_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/GDP_oil_rents(%25).csv')
gdp_oil_df = clean_and_melt_df(gdp_oil_df,'Oil Rents (% GDP)')
all_dfs.append(gdp_oil_df)


# Load CO2 and GHG Emission Datasets
CO2_emission_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/co2_emissions(kt).csv')
CO2_emission_df = clean_and_melt_df(CO2_emission_df,'CO2 Emission (kt)')
all_dfs.append(CO2_emission_df)

GHG_emission_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Final_Data/total_GHG_emissons_co2eqv(kt).csv')
GHG_emission_df = clean_and_melt_df(GHG_emission_df,'GHG Emision (CO2 eqv)')
all_dfs.append(GHG_emission_df)

for df in all_dfs:
  display(df)

Unnamed: 0,Country,Time,Population
7980,Aruba,1990,62152.0
7981,Africa Eastern and Southern,1990,304648010.0
7982,Afghanistan,1990,12412311.0
7983,Africa Western and Central,1990,204803865.0
7984,Angola,1990,11848385.0
...,...,...,...
16221,Kosovo,2020,1775378.0
16222,"Yemen, Rep.",2020,29825968.0
16223,South Africa,2020,59308690.0
16224,Zambia,2020,18383956.0


Unnamed: 0,Country,Time,Pop Growth (%)
7980,Aruba,1990,1.816830
7981,Africa Eastern and Southern,1990,2.913059
7982,Afghanistan,1990,4.476954
7983,Africa Western and Central,1990,2.718932
7984,Angola,1990,3.378411
...,...,...,...
16221,Kosovo,2020,-0.757525
16222,"Yemen, Rep.",2020,2.251561
16223,South Africa,2020,1.273356
16224,Zambia,2020,2.885686


Unnamed: 0,Country,Time,Urban Pop (%)
7980,Aruba,1990,50.319000
7981,Africa Eastern and Southern,1990,24.919224
7982,Afghanistan,1990,21.177000
7983,Africa Western and Central,1990,31.000834
7984,Angola,1990,37.144000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,37.908000
16223,South Africa,2020,67.354000
16224,Zambia,2020,44.629000


Unnamed: 0,Country,Time,Land Area (m2)
7980,Aruba,1990,1.800000e+02
7981,Africa Eastern and Southern,1990,1.477796e+07
7982,Afghanistan,1990,6.528600e+05
7983,Africa Western and Central,1990,9.045780e+06
7984,Angola,1990,1.246700e+06
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,5.279700e+05
16223,South Africa,2020,1.213090e+06
16224,Zambia,2020,7.433900e+05


Unnamed: 0,Country,Time,Agriculture Land (%)
7980,Aruba,1990,11.111111
7981,Africa Eastern and Southern,1990,43.570264
7982,Afghanistan,1990,58.266703
7983,Africa Western and Central,1990,35.237689
7984,Angola,1990,46.044758
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Forest Land (%)
7980,Aruba,1990,2.333333
7981,Africa Eastern and Southern,1990,40.565912
7982,Afghanistan,1990,1.850994
7983,Africa Western and Central,1990,22.776908
7984,Angola,1990,63.578070
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,1.039832
16223,South Africa,2020,14.055091
16224,Zambia,2020,60.283337


Unnamed: 0,Country,Time,GDP (US$)
7980,Aruba,1990,7.648871e+08
7981,Africa Eastern and Southern,1990,2.126590e+11
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,1.218377e+11
7984,Angola,1990,1.123628e+10
...,...,...,...
16221,Kosovo,2020,7.611402e+09
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,3.019236e+11
16224,Zambia,2020,1.932005e+10


Unnamed: 0,Country,Time,GDP Growth (%)
7980,Aruba,1990,3.961402
7981,Africa Eastern and Southern,1990,0.116166
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,6.577794
7984,Angola,1990,-3.450099
...,...,...,...
16221,Kosovo,2020,-6.888013
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,-6.959604
16224,Zambia,2020,-3.016189


Unnamed: 0,Country,Time,Forest Rents (% GDP)
7980,Aruba,1990,0.001552
7981,Africa Eastern and Southern,1990,3.153362
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,3.783488
7984,Angola,1990,0.997414
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Coal Rents (% GDP)
7980,Aruba,1990,0.000000
7981,Africa Eastern and Southern,1990,1.616155
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,0.004070
7984,Angola,1990,0.000000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,Oil Rents (% GDP)
7980,Aruba,1990,0.000000
7981,Africa Eastern and Southern,1990,1.570861
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,13.086197
7984,Angola,1990,27.943410
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,CO2 Emission (kt)
7980,Aruba,1990,
7981,Africa Eastern and Southern,1990,309980.825443
7982,Afghanistan,1990,2960.000000
7983,Africa Western and Central,1990,90210.000000
7984,Angola,1990,6330.000000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country,Time,GHG Emision (CO2 eqv)
7980,Aruba,1990,
7981,Africa Eastern and Southern,1990,838050.0
7982,Afghanistan,1990,15180.0
7983,Africa Western and Central,1990,398170.0
7984,Angola,1990,42180.0
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


In [None]:
rest_indexes = list(map(get_index, all_dfs))
first_index = rest_indexes.pop(0)

merged_df = first_index.join(rest_indexes, how='outer')
merged_df = merged_df.sort_values(by=["Country", "Time"])
merged_df.reset_index(inplace=True)
display(merged_df[merged_df['Country'] == "Afghanistan"])
merged_df.to_csv("final_data.csv")

Unnamed: 0,Country,Time,Population,Pop Growth (%),Urban Pop (%),Land Area (m2),Agriculture Land (%),Forest Land (%),GDP (US$),GDP Growth (%),Forest Rents (% GDP),Coal Rents (% GDP),Oil Rents (% GDP),CO2 Emission (kt),GHG Emision (CO2 eqv)
0,Afghanistan,1990,12412311.0,4.476954,21.177,652860.0,58.266703,1.850994,,,,,,2960.0,15180.0
1,Afghanistan,1991,13299016.0,6.900124,21.266,652860.0,58.251386,1.850994,,,,,,2740.0,15100.0
2,Afghanistan,1992,14485543.0,8.546107,21.355,652860.0,58.251386,1.850994,,,,,,1430.0,13630.0
3,Afghanistan,1993,15816601.0,8.790897,21.444,652860.0,58.104341,1.850994,,,,,,1360.0,13460.0
4,Afghanistan,1994,17075728.0,7.659796,21.534,652860.0,57.919003,1.850994,,,,,,1300.0,13270.0
5,Afghanistan,1995,18110662.0,5.884279,21.624,652860.0,57.827099,1.850994,,,,,,1250.0,13480.0
6,Afghanistan,1996,18853444.0,4.019478,21.714,652860.0,57.825568,1.850994,,,,,,1180.0,14440.0
7,Afghanistan,1997,19357126.0,2.636502,21.805,652860.0,57.883773,1.850994,,,,,,1100.0,15340.0
8,Afghanistan,1998,19737770.0,1.947344,21.895,652860.0,58.001716,1.850994,,,,,,1050.0,16100.0
9,Afghanistan,1999,20170847.0,2.170429,21.986,652860.0,57.827099,1.850994,,,,,,820.0,16910.0
