# Prediction of CO2 Emission per Country
## Pappo, Katz, Pimenta Silva, Tuncel

### Importing and Downloading

In [20]:
import numpy as np
import pandas as pd

In [73]:
df_CO2_brut = pd.read_csv('data/GCB2022v27_MtCo2_flat.csv')
# df_CO2

In [74]:
df_GEI_brut = pd.read_csv('data/Global Economy Indicators.csv')
# df_GEI_brut

### Cleaning the datasets

In [97]:
# Drop rows where year is less than 1970
df_CO2 = df_CO2_brut.drop(df_CO2_brut[df_CO2_brut['Year'] < 1970].index)
df_CO2 = df_CO2.drop(df_CO2[df_CO2['Country'].isin(['Global', 'International Transport'])].index)
df_CO2 = df_CO2.drop(columns=['ISO 3166-1 alpha-3'])

# Replace NaN values with 0
df_CO2 = df_CO2.replace(np.nan, 0)

# Delete spaces in column names
df_CO2.columns = df_CO2.columns.str.strip()

df_CO2.reset_index(drop=True, inplace=True).head()

Unnamed: 0,Country,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,Per Capita
220,Afghanistan,1970,1.670397,0.436974,0.671986,0.216651,0.04735,0.297436,0.0,0.155343
221,Afghanistan,1971,1.893554,0.359072,0.747456,0.43968,0.043234,0.304112,0.0,0.171893
222,Afghanistan,1972,1.530347,0.190528,0.626544,0.300448,0.046427,0.3664,0.0,0.135588
223,Afghanistan,1973,1.635454,0.310745,0.701918,0.33268,0.067106,0.223005,0.0,0.141288
224,Afghanistan,1974,1.913152,0.304695,0.770914,0.400141,0.070301,0.367102,0.0,0.161177


In [99]:
# Drop columns that are not needed
df_GEI = df_GEI_brut.drop(columns=[
    ' CountryID ', 
    ' Currency ',
    ' AMA exchange rate ', 
    ' IMF based exchange rate ', 
    ' Changes in inventories ', 
    ' Other Activities (ISIC J-P) '])

df_GEI.columns = df_GEI.columns.str.strip()

df_GEI.head()

Unnamed: 0,Country,Year,Population,Per capita GNI,"Agriculture, hunting, forestry, fishing (ISIC A-B)",Construction (ISIC F),Exports of goods and services,Final consumption expenditure,General government final consumption expenditure,Gross capital formation,Gross fixed capital formation (including Acquisitions less disposals of valuables),Household consumption expenditure (including Non-profit institutions serving households),Imports of goods and services,Manufacturing (ISIC D),"Mining, Manufacturing, Utilities (ISIC C-E)",Total Value Added,"Transport, storage and communication (ISIC I)","Wholesale, retail trade, restaurants and hotels (ISIC G-H)",Gross National Income(GNI) in USD,Gross Domestic Product (GDP)
0,Afghanistan,1970,10752971,164,869917400.0,46793902.0,165618722.0,1663221000.0,112126986.0,94611818.0,94611818.0,1551094000.0,195277226.0,370146827.0,376690811.0,1731454000.0,83917200.0,226387091.0,1766528000.0,1731436000.0
1,Afghanistan,1971,11015857,168,910828100.0,48994113.0,193580300.0,1796541000.0,121114833.0,99012350.0,99012350.0,1675426000.0,276296480.0,387549502.0,394401164.0,1812857000.0,87860382.0,237019196.0,1850122000.0,1812838000.0
2,Afghanistan,1972,11286753,149,827945300.0,44535223.0,227654380.0,1607159000.0,108347543.0,103456794.0,103456794.0,1498812000.0,290370350.0,352284669.0,358512865.0,1647918000.0,79864525.0,215477287.0,1683948000.0,1647900000.0
3,Afghanistan,1973,11575305,150,855486900.0,46018542.0,226913554.0,1617037000.0,109013455.0,121728433.0,121728433.0,1508024000.0,262962880.0,364010279.0,370445793.0,1702735000.0,82528885.0,222624293.0,1739998000.0,1702716000.0
4,Afghanistan,1974,11869879,177,1035913000.0,55721659.0,284938449.0,1907408000.0,128588961.0,175061875.0,175061875.0,1778819000.0,305679151.0,440760406.0,448552790.0,2061752000.0,99918604.0,269525910.0,2106420000.0,2061729000.0


In [113]:
intersection_countries = set(df_CO2['Country']) & set(df_GEI['Country'])
intersection_countries


set()

### Merging Datasets

In [115]:
df = df_GEI.merge(df_CO2, on=['Country', 'Year'])
df

Unnamed: 0,Country_x,Year,Population,Per capita GNI,"Agriculture, hunting, forestry, fishing (ISIC A-B)",Construction (ISIC F),Exports of goods and services,Final consumption expenditure,General government final consumption expenditure,Gross capital formation,...,Gross Domestic Product (GDP),Country_y,Total,Coal,Oil,Gas,Cement,Flaring,Other,Per Capita
0,Afghanistan,1970,10752971,164,869917407.0,4.679390e+07,1.656187e+08,1.663221e+09,1.121270e+08,9.461182e+07,...,1.731436e+09,Afghanistan,1.670397,0.436974,0.671986,0.216651,0.047350,0.297436,0.0,0.155343
1,Afghanistan,1970,10752971,164,869917407.0,4.679390e+07,1.656187e+08,1.663221e+09,1.121270e+08,9.461182e+07,...,1.731436e+09,Albania,3.739486,0.891225,2.482960,0.187047,0.178255,0.000000,0.0,1.608567
2,Afghanistan,1970,10752971,164,869917407.0,4.679390e+07,1.656187e+08,1.663221e+09,1.121270e+08,9.461182e+07,...,1.731436e+09,Algeria,15.058960,0.970960,6.837024,0.135568,0.457920,6.657488,0.0,1.091552
3,Afghanistan,1970,10752971,164,869917407.0,4.679390e+07,1.656187e+08,1.663221e+09,1.121270e+08,9.461182e+07,...,1.731436e+09,Andorra,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
4,Afghanistan,1970,10752971,164,869917407.0,4.679390e+07,1.656187e+08,1.663221e+09,1.121270e+08,9.461182e+07,...,1.731436e+09,Angola,3.577917,0.047583,1.742281,0.084186,0.221465,1.482403,0.0,0.593382
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2417755,Zambia,2021,19473125,1062,628528683.0,3.252117e+09,1.150134e+10,1.144464e+10,3.021303e+09,6.102614e+09,...,2.131337e+10,Viet Nam,326.013668,196.518368,60.613903,13.679221,54.120668,1.081508,0.0,3.344827
2417756,Zambia,2021,19473125,1062,628528683.0,3.252117e+09,1.150134e+10,1.144464e+10,3.021303e+09,6.102614e+09,...,2.131337e+10,Wallis and Futuna Islands,0.027818,0.000000,0.027818,0.000000,0.000000,0.000000,0.0,2.392571
2417757,Zambia,2021,19473125,1062,628528683.0,3.252117e+09,1.150134e+10,1.144464e+10,3.021303e+09,6.102614e+09,...,2.131337e+10,Yemen,12.476597,0.245909,9.592510,0.206611,0.663596,1.767971,0.0,0.378289
2417758,Zambia,2021,19473125,1062,628528683.0,3.252117e+09,1.150134e+10,1.144464e+10,3.021303e+09,6.102614e+09,...,2.131337e+10,Zambia,7.676180,2.463629,4.251568,0.000000,0.960983,0.000000,0.0,0.394194
