# Supervised Machine Learning Model to Predict CO2 Level

In [32]:
import numpy as np
import pandas as pd
import regex as re
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import confusion_matrix, classification_report

In [33]:
# Load data
file_path = "./Data file/usa_co2_temp_1800_2013.csv"
co2_temp_df = pd.read_csv(file_path)
co2_temp_df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
0,United States,1800,0.253,0.042,,,0.253,0.042,,,...,,,,,,6000000,,,12.377868,3.205628
1,United States,1801,0.267,0.044,,,0.267,0.044,,,...,,,,,,6113782,,,12.681863,2.895417
2,United States,1802,0.289,0.046,,,0.289,0.046,,,...,,,,,,6229723,,,12.859703,3.065703
3,United States,1803,0.297,0.047,,,0.297,0.047,,,...,,,,,,6347862,,,12.692603,3.065032
4,United States,1804,0.333,0.052,,,0.333,0.052,,,...,,,,,,6468241,,,12.576432,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,United States,2009,5478.210,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,1.489429e+13,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.800,309011469,1.527062e+13,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.820,311584051,1.550930e+13,1.650,11.811093,0.232825
212,United States,2012,5338.698,17.000,35.270,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.970,246.42,0.785,314043885,1.584964e+13,1.567,12.761842,0.269340


In [37]:
# Drop the null rows
df = co2_temp_df.dropna()
df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
190,United States,1990,5113.455,20.282,33.484,0.133,1841.986,7.306,41.757,1033.673,...,0.553,767.47,3.044,250.21,0.992,252120309,9250378000000.0,2.432,11.990402,0.235575
191,United States,1991,5057.931,19.871,32.736,0.129,1827.639,7.18,41.137,1056.686,...,0.547,770.75,3.028,251.51,0.988,254539371,9243499000000.0,2.431,11.841255,0.194371
192,United States,1992,5167.481,20.108,32.993,0.128,1850.697,7.201,40.963,1091.238,...,0.54,769.59,2.995,257.86,1.003,256990608,9572148000000.0,2.383,11.091531,0.24725
193,United States,1993,5267.343,20.296,34.838,0.134,1908.247,7.353,40.82,1119.02,...,0.536,752.41,2.899,262.64,1.012,259532130,9834941000000.0,2.366,10.743381,0.215101
194,United States,1994,5358.824,20.435,36.31,0.138,1920.743,7.324,40.965,1145.894,...,0.524,759.44,2.896,270.67,1.032,262241204,10232020000000.0,2.325,11.319369,0.21419
195,United States,1995,5421.502,20.446,37.075,0.14,1942.302,7.325,39.41,1194.544,...,0.516,746.77,2.816,273.53,1.032,265163740,10510260000000.0,2.307,11.291533,0.19691
196,United States,1996,5610.582,20.909,37.309,0.139,2024.058,7.543,38.032,1215.071,...,0.514,740.53,2.76,281.25,1.048,268335007,10909170000000.0,2.297,10.781324,0.188526
197,United States,1997,5686.465,20.928,38.561,0.142,2067.227,7.608,38.183,1223.335,...,0.499,723.75,2.664,274.45,1.01,271713634,11398760000000.0,2.213,11.067458,0.21443
198,United States,1998,5731.05,20.827,39.461,0.143,2094.976,7.613,35.863,1200.762,...,0.481,704.8,2.561,271.12,0.985,275175309,11906030000000.0,2.134,12.423116,0.189361
199,United States,1999,5804.669,20.839,40.239,0.144,2100.317,7.54,35.834,1204.715,...,0.466,690.45,2.479,266.72,0.958,278548148,12463820000000.0,2.067,12.016275,0.195652


In [46]:
df.columns

Index(['country', 'year', 'co2', 'co2_per_capita', 'cement_co2',
       'cement_co2_per_capita', 'coal_co2', 'coal_co2_per_capita',
       'flaring_co2', 'gas_co2', 'gas_co2_per_capita', 'oil_co2',
       'oil_co2_per_capita', 'co2_growth_prct', 'co2_per_gdp', 'methane',
       'methane_per_capita', 'nitrous_oxide', 'nitrous_oxide_per_capita',
       'population', 'gdp', 'energy_per_gdp', 'Avg Temperature',
       'Avg Temperature Uncertainty'],
      dtype='object')

In [47]:
columns = [
        'country', 'year', 'co2_per_capita', 'cement_co2',
       'cement_co2_per_capita', 'coal_co2', 'coal_co2_per_capita',
       'flaring_co2', 'gas_co2', 'gas_co2_per_capita', 'oil_co2',
       'oil_co2_per_capita', 'co2_growth_prct', 'co2_per_gdp', 'methane',
       'methane_per_capita', 'nitrous_oxide', 'nitrous_oxide_per_capita',
       'population', 'gdp', 'energy_per_gdp', 'Avg Temperature',
       'Avg Temperature Uncertainty']

target = ['co2']

In [48]:
X = df.drop(columns = 'co2')
X = pd.get_dummies(X, columns = ['country'])
y = df['co2'].to_frame()

In [49]:
X.describe()

Unnamed: 0,year,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,gas_co2_per_capita,oil_co2,...,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty,country_United States
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,...,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,2001.5,19.946583,38.472667,0.135417,2015.415958,7.105917,39.903333,1217.710458,4.277917,2325.698,...,688.9625,2.4435,259.413333,0.916625,284763400.0,12949280000000.0,1.997875,11.688566,0.218899,1.0
std,7.071068,1.273585,5.117198,0.017742,157.92529,0.658,4.453662,91.849953,0.157986,171.557863,...,51.32097,0.358221,10.101674,0.087861,20272960.0,2334810000000.0,0.290745,0.580356,0.032669,0.0
min,1990.0,17.0,29.615,0.097,1684.037,5.362,35.834,1033.673,3.973,2076.071,...,618.71,1.956,241.81,0.782,252120300.0,9243499000000.0,1.567,10.743381,0.188526,1.0
25%,1995.75,19.775,34.4995,0.12875,1905.346,7.176,36.76175,1191.637,4.17175,2183.31,...,652.195,2.169,251.185,0.8615,267542200.0,10809440000000.0,1.742,11.267063,0.199246,1.0
50%,2001.5,20.415,37.935,0.1395,2045.6425,7.34,38.723,1213.093,4.2535,2279.325,...,666.57,2.331,258.9,0.8985,285943700.0,13217480000000.0,1.969,11.771985,0.21277,1.0
75%,2007.25,20.803,42.00075,0.14775,2162.515,7.496,41.008,1257.56225,4.38275,2477.92375,...,742.09,2.774,264.755,0.99475,301327800.0,15139570000000.0,2.2995,11.923896,0.229225,1.0
max,2013.0,21.336,46.851,0.157,2214.838,7.738,54.647,1427.506,4.528,2608.477,...,770.75,3.044,281.25,1.048,316400500.0,16108430000000.0,2.432,13.09842,0.338102,1.0


In [50]:
y['co2'].value_counts()

5113.455    1
5057.931    1
5338.698    1
5540.173    1
5675.786    1
5478.210    1
5914.078    1
6131.004    1
6051.668    1
6134.521    1
6114.007    1
6011.837    1
5946.771    1
5904.882    1
6010.508    1
5804.669    1
5731.050    1
5686.465    1
5610.582    1
5421.502    1
5358.824    1
5267.343    1
5167.481    1
5474.257    1
Name: co2, dtype: int64

In [51]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

In [52]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
ranforest_model = BalancedRandomForestClassifier(n_estimators=100, random_state = 1)
ranforest_model

BalancedRandomForestClassifier(random_state=1)

In [53]:
# Calculated the balanced accuracy score
ranforest_model = ranforest_model.fit(X_train, y_train)
y_pred = ranforest_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

  


ValueError: Unknown label type: 'continuous'