In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Import dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import balanced_accuracy_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import ClusterCentroids
from imblearn.combine import SMOTEENN
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [3]:
# Load data
file_path = "./Data file/usa_co2_temp_1800_2013.csv"
predict_temp_df = pd.read_csv(file_path)
predict_temp_df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
0,United States,1800,0.253,0.042,,,0.253,0.042,,,...,,,,,,6000000,,,12.377868,3.205628
1,United States,1801,0.267,0.044,,,0.267,0.044,,,...,,,,,,6113782,,,12.681863,2.895417
2,United States,1802,0.289,0.046,,,0.289,0.046,,,...,,,,,,6229723,,,12.859703,3.065703
3,United States,1803,0.297,0.047,,,0.297,0.047,,,...,,,,,,6347862,,,12.692603,3.065032
4,United States,1804,0.333,0.052,,,0.333,0.052,,,...,,,,,,6468241,,,12.576432,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,United States,2009,5478.210,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,1.489429e+13,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.800,309011469,1.527062e+13,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.820,311584051,1.550930e+13,1.650,11.811093,0.232825
212,United States,2012,5338.698,17.000,35.270,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.970,246.42,0.785,314043885,1.584964e+13,1.567,12.761842,0.269340


In [4]:
# Removing the NaN values by replacing the NaN values with the mean average. 
predict_temp_df['flaring_co2'].fillna(predict_temp_df['flaring_co2'].mean(), inplace = True)
predict_temp_df['cement_co2'].fillna(predict_temp_df['cement_co2'].mean(), inplace = True)
predict_temp_df['cement_co2_per_capita'].fillna(predict_temp_df['cement_co2_per_capita'].mean(), inplace = True)
predict_temp_df['gas_co2'].fillna(predict_temp_df['gas_co2'].mean(), inplace = True)
predict_temp_df['gas_co2_per_capita'].fillna(predict_temp_df['gas_co2_per_capita'].mean(), inplace = True)
predict_temp_df['oil_co2'].fillna(predict_temp_df['oil_co2'].mean(), inplace = True)
predict_temp_df['oil_co2_per_capita'].fillna(predict_temp_df['oil_co2_per_capita'].mean(), inplace = True)
predict_temp_df['co2_per_gdp'].fillna(predict_temp_df['co2_per_gdp'].mean(), inplace = True)
predict_temp_df['methane'].fillna(predict_temp_df['methane'].mean(), inplace = True)
predict_temp_df['methane_per_capita'].fillna(predict_temp_df['methane_per_capita'].mean(), inplace = True)
predict_temp_df['nitrous_oxide'].fillna(predict_temp_df['nitrous_oxide'].mean(), inplace = True)
predict_temp_df['nitrous_oxide_per_capita'].fillna(predict_temp_df['nitrous_oxide_per_capita'].mean(), inplace = True)
predict_temp_df['gdp'].fillna(predict_temp_df['gdp'].mean(), inplace = True)
predict_temp_df['energy_per_gdp'].fillna(predict_temp_df['energy_per_gdp'].mean(), inplace = True)
predict_temp_df['co2_growth_prct'].fillna(predict_temp_df['co2_growth_prct'].mean(), inplace = True)

predict_temp_df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
0,United States,1800,0.253,0.042,18.398612,0.085731,0.253,0.042,27.881063,497.198833,...,0.718742,688.9625,2.4435,259.413333,0.916625,6000000,3.055051e+12,2.602592,12.377868,3.205628
1,United States,1801,0.267,0.044,18.398612,0.085731,0.267,0.044,27.881063,497.198833,...,0.718742,688.9625,2.4435,259.413333,0.916625,6113782,3.055051e+12,2.602592,12.681863,2.895417
2,United States,1802,0.289,0.046,18.398612,0.085731,0.289,0.046,27.881063,497.198833,...,0.718742,688.9625,2.4435,259.413333,0.916625,6229723,3.055051e+12,2.602592,12.859703,3.065703
3,United States,1803,0.297,0.047,18.398612,0.085731,0.297,0.047,27.881063,497.198833,...,0.718742,688.9625,2.4435,259.413333,0.916625,6347862,3.055051e+12,2.602592,12.692603,3.065032
4,United States,1804,0.333,0.052,18.398612,0.085731,0.333,0.052,27.881063,497.198833,...,0.718742,688.9625,2.4435,259.413333,0.916625,6468241,3.055051e+12,2.602592,12.576432,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,United States,2009,5478.210,17.885,29.615000,0.097000,1905.943,6.222,39.071000,1245.742000,...,0.368000,649.8900,2.1220,241.810000,0.789000,306307565,1.489429e+13,1.676000,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449000,0.102000,2013.304,6.515,41.343000,1308.832000,...,0.372000,650.3800,2.1050,247.170000,0.800000,309011469,1.527062e+13,1.688000,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208000,0.103000,1903.555,6.109,45.542000,1327.528000,...,0.357000,628.7300,2.0180,255.550000,0.820000,311584051,1.550930e+13,1.650000,11.811093,0.232825
212,United States,2012,5338.698,17.000,35.270000,0.112000,1684.037,5.362,48.883000,1388.255000,...,0.337000,618.7100,1.9700,246.420000,0.785000,314043885,1.584964e+13,1.567000,12.761842,0.269340


In [5]:
columns = [
    'country', 'year', 'co2', 'co2_per_capita', 'cement_co2',
       'cement_co2_per_capita', 'coal_co2', 'coal_co2_per_capita',
       'flaring_co2', 'gas_co2', 'gas_co2_per_capita', 'oil_co2',
       'oil_co2_per_capita', 'co2_growth_prct', 'co2_per_gdp', 'methane',
       'methane_per_capita', 'nitrous_oxide', 'nitrous_oxide_per_capita',
       'population', 'gdp', 'energy_per_gdp',
       'Avg Temperature Uncertainty']

target = ['Avg Temperature']

In [13]:
# Create our features
X = predict_temp_df.drop(columns = "Avg Temperature")
X = pd.get_dummies(X, columns = ["country"])

# Create our target
y = predict_temp_df['Avg Temperature'].astype('int').to_frame()

In [14]:
X.describe()

Unnamed: 0,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,gas_co2_per_capita,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature Uncertainty,country_United States
count,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,...,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0
mean,1906.5,1775.834846,9.659154,18.398612,0.085731,777.669472,5.111668,27.881063,497.198833,2.186538,...,0.718742,688.9625,2.4435,259.413333,0.916625,111935100.0,3055051000000.0,2.602592,1.052999,1.0
std,61.920648,2020.307607,7.936626,12.609423,0.052663,713.472753,3.865967,8.113161,387.022883,1.459075,...,0.410101,16.864325,0.117713,3.31946,0.028872,93376970.0,4122802000000.0,0.337572,1.088695,0.0
min,1800.0,0.253,0.042,0.0,0.0,0.253,0.042,4.99,0.165,0.003,...,0.029,618.71,1.956,241.81,0.782,6000000.0,26689670000.0,1.567,0.169596,1.0
25%,1853.25,30.91125,1.18525,10.4095,0.0765,30.91125,1.18525,27.881063,99.79125,0.769,...,0.3995,688.9625,2.4435,259.413333,0.916625,26074100.0,219861000000.0,2.602592,0.230978,1.0
50%,1906.5,1044.142,10.775,18.398612,0.085731,887.8935,5.5865,27.881063,497.198833,2.186538,...,0.718742,688.9625,2.4435,259.413333,0.916625,89675700.0,1237754000000.0,2.602592,0.38955,1.0
75%,1959.75,2879.977,16.341,28.458,0.1305,1312.31575,7.463,27.881063,620.82125,3.32475,...,0.99325,688.9625,2.4435,259.413333,0.916625,185987000.0,3242571000000.0,2.602592,1.691417,1.0
max,2013.0,6134.521,22.236,46.851,0.171,2214.838,14.344,54.647,1427.506,5.231,...,1.648,770.75,3.044,281.25,1.048,316400500.0,16108430000000.0,3.746,3.706145,1.0


In [15]:
# Check the balance of our target values
y['Avg Temperature'].value_counts()

10    114
11     57
9      27
12     11
8       4
13      1
Name: Avg Temperature, dtype: int64

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)

In [17]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
ranforest_model = BalancedRandomForestClassifier(n_estimators=100, random_state = 1)
ranforest_model

BalancedRandomForestClassifier(random_state=1)

In [18]:
# Calculated the balanced accuracy score
ranforest_model = ranforest_model.fit(X_train, y_train)
y_pred = ranforest_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.43818681318681313

In [19]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[ 3,  0,  0,  0,  0,  0],
       [ 3,  4,  0,  0,  0,  0],
       [ 5,  7, 13,  1,  0,  0],
       [ 3,  1,  5,  4,  0,  0],
       [ 1,  0,  0,  2,  1,  0],
       [ 0,  0,  0,  1,  0,  0]])

In [20]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          8       0.20      1.00      0.76      0.33      0.87      0.78         3
          9       0.33      0.57      0.83      0.42      0.69      0.46         7
         10       0.72      0.50      0.82      0.59      0.64      0.40        26
         11       0.50      0.31      0.90      0.38      0.53      0.26        13
         12       1.00      0.25      1.00      0.40      0.50      0.23         4
         13       0.00      0.00      1.00      0.00      0.00      0.00         1

avg / total       0.60      0.46      0.86      0.48      0.61      0.37        54

