<a href="https://colab.research.google.com/github/mohannashahrad/Borealis_AI_Plant_Tree_Project/blob/main/preprocessing_scripts/First_5_DataPreprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
# Imort all the required libraries
import pandas as pd
import requests
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

In [54]:
# Define all the functions

def load_DF(url):
  data = StringIO(requests.get(url).text)
  return pd.read_csv(data, skiprows=[0,1,2,3])

def clean_and_melt_df(df,value):
  df = df.iloc[:, :-1]
  df.drop(['Country Code','Indicator Code', 'Indicator Name'],1,inplace=True)
  df = pd.melt(df, id_vars='Country Name', var_name='Time', value_name=value)
  df['Time'] = df['Time'].astype(int)
  df = df[df.Time >= 1990]
  df = df[df.Time <= 2020]
  return df   

In [55]:
# Loading Datasets

# Load Agricultural Land Dataset
agricultural_land_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Data/Agricultural_Land%20/API_AG.LND.AGRI.ZS_DS2_en_csv_v2_3052884.csv')
agricultural_land_df = clean_and_melt_df(agricultural_land_df,'Agricultural Land')
display(agricultural_land_df)

# Load CO2 Emission Dataset
CO2_emission_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Data/CO2%20emissions%20(kt)/API_EN.ATM.CO2E.KT_DS2_en_csv_v2_3052647.csv')
CO2_emission_df = clean_and_melt_df(CO2_emission_df,'CO2 Emission')
display(CO2_emission_df)

# Load Coal Rents Dataset
coal_rents_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Data/Coal%20rents%20(%25%20of%20GDP)/API_NY.GDP.COAL.RT.ZS_DS2_en_csv_v2_3057787.csv')
coal_rents_df = clean_and_melt_df(coal_rents_df,'Coal Rents')
display(coal_rents_df)

# Load Forest Rents Dataset
forest_rents_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Data/Forest%20rents%20(%25%20of%20GDP)/API_NY.GDP.FRST.RT.ZS_DS2_en_csv_v2_3057795.csv')
forest_rents_df = clean_and_melt_df(forest_rents_df,'Forest Rents')
display(forest_rents_df)

#Load GDP Growth Dataset
gdp_growth_df = load_DF('https://raw.githubusercontent.com/mohannashahrad/Borealis_AI_Plant_Tree_Project/main/Data/GDP%20growth%20(annual%20%25)/API_NY.GDP.MKTP.KD.ZG_DS2_en_csv_v2_3052565.csv')
gdp_growth_df = clean_and_melt_df(gdp_growth_df,'GDP Growth')
display(gdp_growth_df)

Unnamed: 0,Country Name,Time,Agricultural Land
7980,Aruba,1990,11.111111
7981,Africa Eastern and Southern,1990,43.570264
7982,Afghanistan,1990,58.266703
7983,Africa Western and Central,1990,35.237689
7984,Angola,1990,46.044758
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country Name,Time,CO2 Emission
7980,Aruba,1990,
7981,Africa Eastern and Southern,1990,309980.825443
7982,Afghanistan,1990,2960.000000
7983,Africa Western and Central,1990,90210.000000
7984,Angola,1990,6330.000000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country Name,Time,Coal Rents
7980,Aruba,1990,0.000000
7981,Africa Eastern and Southern,1990,1.616155
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,0.004070
7984,Angola,1990,0.000000
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country Name,Time,Forest Rents
7980,Aruba,1990,0.001552
7981,Africa Eastern and Southern,1990,3.153362
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,3.783488
7984,Angola,1990,0.997414
...,...,...,...
16221,Kosovo,2020,
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,
16224,Zambia,2020,


Unnamed: 0,Country Name,Time,GDP Growth
7980,Aruba,1990,3.961402
7981,Africa Eastern and Southern,1990,0.052977
7982,Afghanistan,1990,
7983,Africa Western and Central,1990,6.437208
7984,Angola,1990,-3.450099
...,...,...,...
16221,Kosovo,2020,-6.888013
16222,"Yemen, Rep.",2020,
16223,South Africa,2020,-6.959604
16224,Zambia,2020,-3.016189


In [60]:
agr_index = agricultural_land_df.set_index(['Country Name', 'Time'])
co2_index = CO2_emission_df.set_index(['Country Name', 'Time'])
coal_index = coal_rents_df.set_index(['Country Name', 'Time'])
forest_index = forest_rents_df.set_index(['Country Name', 'Time'])
gdp_index = gdp_growth_df.set_index(['Country Name', 'Time'])
merged_df = agr_index.join([co2_index,coal_index,forest_index,gdp_index], how='outer')
merged_df.reset_index(inplace=True)
display(merged_df[merged_df['Country Name'] == "Afghanistan"])
merged_df.to_csv("agr_co2_coal_forest_gdp.csv")

Unnamed: 0,Country Name,Time,Agricultural Land,CO2 Emission,Coal Rents,Forest Rents,GDP Growth
2,Afghanistan,1990,58.266703,2960.0,,,
268,Afghanistan,1991,58.251386,2740.0,,,
534,Afghanistan,1992,58.251386,1430.0,,,
800,Afghanistan,1993,58.104341,1360.0,,,
1066,Afghanistan,1994,57.919003,1300.0,,,
1332,Afghanistan,1995,57.827099,1250.0,,,
1598,Afghanistan,1996,57.825568,1180.0,,,
1864,Afghanistan,1997,57.883773,1100.0,,,
2130,Afghanistan,1998,58.001716,1050.0,,,
2396,Afghanistan,1999,57.827099,820.0,,,
