In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

CO2_df = pd.read_csv('owid-co2-data.csv')
CO2_df

Unnamed: 0,iso_code,country,year,co2,co2_per_capita,trade_co2,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,...,ghg_excluding_lucf_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,primary_energy_consumption,energy_per_capita,energy_per_gdp
0,AFG,Afghanistan,1949,0.015,0.002,,,,0.015,0.002,...,,,,,,7624058.0,,,,
1,AFG,Afghanistan,1950,0.084,0.011,,,,0.021,0.003,...,,,,,,7752117.0,9.421400e+09,,,
2,AFG,Afghanistan,1951,0.092,0.012,,,,0.026,0.003,...,,,,,,7840151.0,9.692280e+09,,,
3,AFG,Afghanistan,1952,0.092,0.012,,,,0.032,0.004,...,,,,,,7935996.0,1.001732e+10,,,
4,AFG,Afghanistan,1953,0.106,0.013,,,,0.038,0.005,...,,,,,,8039684.0,1.063052e+10,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25186,ZWE,Zimbabwe,2016,10.738,0.765,1.415,0.639,0.046,6.959,0.496,...,2.076,11.50,0.820,6.21,0.443,14030338.0,2.096179e+10,47.5,3385.574,1.889
25187,ZWE,Zimbabwe,2017,9.582,0.673,1.666,0.678,0.048,5.665,0.398,...,2.023,11.62,0.816,6.35,0.446,14236599.0,2.194784e+10,,,
25188,ZWE,Zimbabwe,2018,11.854,0.821,1.308,0.697,0.048,7.101,0.492,...,2.173,11.96,0.828,6.59,0.456,14438812.0,2.271535e+10,,,
25189,ZWE,Zimbabwe,2019,10.949,0.748,1.473,0.697,0.048,6.020,0.411,...,,,,,,14645473.0,,,,


In [2]:
CO2_df.dtypes

iso_code                                object
country                                 object
year                                     int64
co2                                    float64
co2_per_capita                         float64
trade_co2                              float64
cement_co2                             float64
cement_co2_per_capita                  float64
coal_co2                               float64
coal_co2_per_capita                    float64
flaring_co2                            float64
flaring_co2_per_capita                 float64
gas_co2                                float64
gas_co2_per_capita                     float64
oil_co2                                float64
oil_co2_per_capita                     float64
other_industry_co2                     float64
other_co2_per_capita                   float64
co2_growth_prct                        float64
co2_growth_abs                         float64
co2_per_gdp                            float64
co2_per_unit_

In [3]:
# Dropping Columns that have more than 90% Nan Values
CO2_df.dropna(thresh=len(CO2_df)*0.9, axis='columns')

Unnamed: 0,country,year,co2,co2_per_capita,co2_growth_prct,co2_growth_abs,cumulative_co2,share_global_co2,share_global_cumulative_co2,population
0,Afghanistan,1949,0.015,0.002,,,0.015,0.00,0.00,7624058.0
1,Afghanistan,1950,0.084,0.011,475.00,0.070,0.099,0.00,0.00,7752117.0
2,Afghanistan,1951,0.092,0.012,8.70,0.007,0.191,0.00,0.00,7840151.0
3,Afghanistan,1952,0.092,0.012,0.00,0.000,0.282,0.00,0.00,7935996.0
4,Afghanistan,1953,0.106,0.013,16.00,0.015,0.388,0.00,0.00,8039684.0
...,...,...,...,...,...,...,...,...,...,...
25186,Zimbabwe,2016,10.738,0.765,-12.17,-1.488,736.467,0.03,0.05,14030338.0
25187,Zimbabwe,2017,9.582,0.673,-10.77,-1.156,746.049,0.03,0.05,14236599.0
25188,Zimbabwe,2018,11.854,0.821,23.72,2.273,757.903,0.03,0.05,14438812.0
25189,Zimbabwe,2019,10.949,0.748,-7.64,-0.905,768.852,0.03,0.05,14645473.0


In [4]:
# Generate our categorical variable list
CO2_cat = CO2_df.dtypes[CO2_df.dtypes == "float64"].index.tolist()

# Check the number of unique values in each column
CO2_df[CO2_cat].nunique()

co2                                    14126
co2_per_capita                          8641
trade_co2                               3603
cement_co2                              4853
cement_co2_per_capita                    696
coal_co2                                9710
coal_co2_per_capita                     4927
flaring_co2                             2794
flaring_co2_per_capita                   952
gas_co2                                 5787
gas_co2_per_capita                      2959
oil_co2                                10022
oil_co2_per_capita                      5863
other_industry_co2                      1472
other_co2_per_capita                     243
co2_growth_prct                         5779
co2_growth_abs                          8580
co2_per_gdp                             1756
co2_per_unit_energy                      625
consumption_co2                         3901
consumption_co2_per_capita              3331
consumption_co2_per_gdp                  830
cumulative