In [69]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import tensorflow as tf
import seaborn as sb
import statsmodels.api as sm

In [2]:
# Load data
file_path = "usa_co2_temp_1800_2013.csv"
predict_temp_df = pd.read_csv(file_path)
predict_temp_df

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
0,United States,1800,0.253,0.042,,,0.253,0.042,,,...,,,,,,6000000,,,12.377868,3.205628
1,United States,1801,0.267,0.044,,,0.267,0.044,,,...,,,,,,6113782,,,12.681863,2.895417
2,United States,1802,0.289,0.046,,,0.289,0.046,,,...,,,,,,6229723,,,12.859703,3.065703
3,United States,1803,0.297,0.047,,,0.297,0.047,,,...,,,,,,6347862,,,12.692603,3.065032
4,United States,1804,0.333,0.052,,,0.333,0.052,,,...,,,,,,6468241,,,12.576432,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,United States,2009,5478.210,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,1.489429e+13,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.800,309011469,1.527062e+13,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.820,311584051,1.550930e+13,1.650,11.811093,0.232825
212,United States,2012,5338.698,17.000,35.270,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.970,246.42,0.785,314043885,1.584964e+13,1.567,12.761842,0.269340


In [3]:
predict_temp_df.tail()

Unnamed: 0,country,year,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature,Avg Temperature Uncertainty
209,United States,2009,5478.21,17.885,29.615,0.097,1905.943,6.222,39.071,1245.742,...,0.368,649.89,2.122,241.81,0.789,306307565,14894290000000.0,1.676,11.156327,0.220114
210,United States,2010,5675.786,18.368,31.449,0.102,2013.304,6.515,41.343,1308.832,...,0.372,650.38,2.105,247.17,0.8,309011469,15270620000000.0,1.688,11.732877,0.211351
211,United States,2011,5540.173,17.781,32.208,0.103,1903.555,6.109,45.542,1327.528,...,0.357,628.73,2.018,255.55,0.82,311584051,15509300000000.0,1.65,11.811093,0.232825
212,United States,2012,5338.698,17.0,35.27,0.112,1684.037,5.362,48.883,1388.255,...,0.337,618.71,1.97,246.42,0.785,314043885,15849640000000.0,1.567,12.761842,0.26934
213,United States,2013,5474.257,17.302,36.369,0.115,1740.833,5.502,54.647,1427.506,...,0.34,619.02,1.956,247.39,0.782,316400539,16108430000000.0,1.589,13.09842,0.338102


In [4]:
predict_temp_df.nunique()

country                          1
year                           214
co2                            213
co2_per_capita                 209
cement_co2                     102
cement_co2_per_capita           56
coal_co2                       213
coal_co2_per_capita            208
flaring_co2                     64
gas_co2                        131
gas_co2_per_capita             131
oil_co2                        154
oil_co2_per_capita             150
co2_growth_prct                207
co2_per_gdp                    181
methane                         24
methane_per_capita              24
nitrous_oxide                   24
nitrous_oxide_per_capita        23
population                     214
gdp                            194
energy_per_gdp                  49
Avg Temperature                214
Avg Temperature Uncertainty    214
dtype: int64

In [5]:
# Removing the NaN values by replacing the NaN values with the mean average. 
predict_temp_df['flaring_co2'].fillna(predict_temp_df['flaring_co2'].mean(), inplace = True)
predict_temp_df['cement_co2'].fillna(predict_temp_df['cement_co2'].mean(), inplace = True)
predict_temp_df['cement_co2_per_capita'].fillna(predict_temp_df['cement_co2_per_capita'].mean(), inplace = True)
predict_temp_df['gas_co2'].fillna(predict_temp_df['gas_co2'].mean(), inplace = True)
predict_temp_df['gas_co2_per_capita'].fillna(predict_temp_df['gas_co2_per_capita'].mean(), inplace = True)
predict_temp_df['oil_co2'].fillna(predict_temp_df['oil_co2'].mean(), inplace = True)
predict_temp_df['oil_co2_per_capita'].fillna(predict_temp_df['oil_co2_per_capita'].mean(), inplace = True)
predict_temp_df['co2_per_gdp'].fillna(predict_temp_df['co2_per_gdp'].mean(), inplace = True)
predict_temp_df['methane'].fillna(predict_temp_df['methane'].mean(), inplace = True)
predict_temp_df['methane_per_capita'].fillna(predict_temp_df['methane_per_capita'].mean(), inplace = True)
predict_temp_df['nitrous_oxide'].fillna(predict_temp_df['nitrous_oxide'].mean(), inplace = True)
predict_temp_df['nitrous_oxide_per_capita'].fillna(predict_temp_df['nitrous_oxide_per_capita'].mean(), inplace = True)
predict_temp_df['gdp'].fillna(predict_temp_df['gdp'].mean(), inplace = True)
predict_temp_df['energy_per_gdp'].fillna(predict_temp_df['energy_per_gdp'].mean(), inplace = True)
predict_temp_df['co2_growth_prct'].fillna(predict_temp_df['co2_growth_prct'].mean(), inplace = True)

predict_temp_df.isnull().sum()

country                        0
year                           0
co2                            0
co2_per_capita                 0
cement_co2                     0
cement_co2_per_capita          0
coal_co2                       0
coal_co2_per_capita            0
flaring_co2                    0
gas_co2                        0
gas_co2_per_capita             0
oil_co2                        0
oil_co2_per_capita             0
co2_growth_prct                0
co2_per_gdp                    0
methane                        0
methane_per_capita             0
nitrous_oxide                  0
nitrous_oxide_per_capita       0
population                     0
gdp                            0
energy_per_gdp                 0
Avg Temperature                0
Avg Temperature Uncertainty    0
dtype: int64

In [6]:
predict_temp_df.dtypes

country                         object
year                             int64
co2                            float64
co2_per_capita                 float64
cement_co2                     float64
cement_co2_per_capita          float64
coal_co2                       float64
coal_co2_per_capita            float64
flaring_co2                    float64
gas_co2                        float64
gas_co2_per_capita             float64
oil_co2                        float64
oil_co2_per_capita             float64
co2_growth_prct                float64
co2_per_gdp                    float64
methane                        float64
methane_per_capita             float64
nitrous_oxide                  float64
nitrous_oxide_per_capita       float64
population                       int64
gdp                            float64
energy_per_gdp                 float64
Avg Temperature                float64
Avg Temperature Uncertainty    float64
dtype: object

In [7]:
# Drop the non-beneficial ID columns
predict_temp_df = predict_temp_df.drop(columns=["country", "year"], axis=1)

In [8]:
# Renaming the DataFrame to help fit column name in the Y and X features data
predict_temp_df.rename(columns={'Avg Temperature': 'AvgTemperature'}, inplace=True)

In [9]:
predict_temp_df['AvgTemperature'] = predict_temp_df['AvgTemperature'].astype('int64')

In [50]:
# Remove Avg Temperature outcome target from features data
y = predict_temp_df.AvgTemperature
X = predict_temp_df.drop(columns="AvgTemperature")

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
predict_temp_df

Unnamed: 0,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,gas_co2_per_capita,oil_co2,...,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,AvgTemperature,Avg Temperature Uncertainty
0,0.253,0.042,18.398612,0.085731,0.253,0.042,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6000000,3.055051e+12,2.602592,12,3.205628
1,0.267,0.044,18.398612,0.085731,0.267,0.044,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6113782,3.055051e+12,2.602592,12,2.895417
2,0.289,0.046,18.398612,0.085731,0.289,0.046,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6229723,3.055051e+12,2.602592,12,3.065703
3,0.297,0.047,18.398612,0.085731,0.297,0.047,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6347862,3.055051e+12,2.602592,12,3.065032
4,0.333,0.052,18.398612,0.085731,0.333,0.052,27.881063,497.198833,2.186538,925.551682,...,0.718742,688.9625,2.4435,259.413333,0.916625,6468241,3.055051e+12,2.602592,12,3.379515
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,5478.210,17.885,29.615000,0.097000,1905.943,6.222,39.071000,1245.742000,4.067000,2233.033000,...,0.368000,649.8900,2.1220,241.810000,0.789000,306307565,1.489429e+13,1.676000,11,0.220114
210,5675.786,18.368,31.449000,0.102000,2013.304,6.515,41.343000,1308.832000,4.236000,2250.504000,...,0.372000,650.3800,2.1050,247.170000,0.800000,309011469,1.527062e+13,1.688000,11,0.211351
211,5540.173,17.781,32.208000,0.103000,1903.555,6.109,45.542000,1327.528000,4.261000,2201.747000,...,0.357000,628.7300,2.0180,255.550000,0.820000,311584051,1.550930e+13,1.650000,11,0.232825
212,5338.698,17.000,35.270000,0.112000,1684.037,5.362,48.883000,1388.255000,4.421000,2152.124000,...,0.337000,618.7100,1.9700,246.420000,0.785000,314043885,1.584964e+13,1.567000,12,0.269340


# Using Logistic Regression Model

In [51]:
# Preprocess numerical data for neural network

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [52]:
# Define the logistic regression model
log_classifier = LogisticRegression(solver="lbfgs",max_iter=200)

# Train the model
log_classifier.fit(X_train,y_train)

# Evaluate the model
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Logistic regression model accuracy: 0.278


In [54]:
# Define the basic neural network model
nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=80, input_dim=21, activation="relu"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=50, activation="sigmoid"))


# Output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
2/2 - 0s - loss: -8.1543e+04 - accuracy: 0.0000e+00 - 129ms/epoch - 64ms/step
Loss: -81543.03125, Accuracy: 0.0


# Using Deep Learning Model 

In [46]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [47]:
X_train

Unnamed: 0,co2,co2_per_capita,cement_co2,cement_co2_per_capita,coal_co2,coal_co2_per_capita,flaring_co2,gas_co2,gas_co2_per_capita,oil_co2,...,co2_growth_prct,co2_per_gdp,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,population,gdp,energy_per_gdp,Avg Temperature Uncertainty
138,1515.693,11.136,9.182000,0.068000,906.794,6.662,27.881063,115.320000,0.847000,482.196000,...,-15.44,1.104,688.9625,2.4435,259.413333,0.916625,136108915,1.373408e+12,2.602592,0.227255
52,26.791,1.068,18.398612,0.085731,26.791,1.068,27.881063,497.198833,2.186538,925.551682,...,8.76,0.267,688.9625,2.4435,259.413333,0.916625,25092277,1.001978e+11,2.602592,1.821508
66,59.232,1.600,18.398612,0.085731,57.763,1.560,27.881063,497.198833,2.186538,1.469000,...,0.70,0.349,688.9625,2.4435,259.413333,0.916625,37017769,1.695929e+11,2.602592,1.238647
26,1.315,0.111,18.398612,0.085731,1.315,0.111,27.881063,497.198833,2.186538,925.551682,...,15.81,0.038,688.9625,2.4435,259.413333,0.916625,11834416,3.480963e+10,2.602592,2.601358
61,45.679,1.393,18.398612,0.085731,44.818,1.367,27.881063,497.198833,2.186538,0.861000,...,-3.71,0.324,688.9625,2.4435,259.413333,0.916625,32787298,1.409043e+11,2.602592,1.405408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,72.866,1.922,18.398612,0.085731,71.499,1.886,27.881063,497.198833,2.186538,1.367000,...,23.02,0.407,688.9625,2.4435,259.413333,0.916625,37917653,1.791725e+11,2.602592,1.467890
192,5167.481,20.108,32.993000,0.128000,1850.697,7.201,40.963000,1091.238000,4.246000,2128.090000,...,2.17,0.540,769.5900,2.9950,257.860000,1.003000,256990608,9.572148e+12,2.383000,0.247250
117,1670.971,15.673,0.008000,0.000000,1481.645,13.897,27.881063,38.882000,0.365000,147.941000,...,10.01,1.648,688.9625,2.4435,259.413333,0.916625,106614820,1.014196e+12,2.602592,0.381123
47,15.070,0.699,18.398612,0.085731,15.070,0.699,27.881063,497.198833,2.186538,925.551682,...,18.56,0.192,688.9625,2.4435,259.413333,0.916625,21560737,7.862561e+10,2.602592,2.324403


In [57]:
# Define the model - deep neural net
number_input_features = (X_train)
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 50

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=21, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_35 (Dense)            (None, 80)                1760      
                                                                 
 dense_36 (Dense)            (None, 50)                4050      
                                                                 
 dense_37 (Dense)            (None, 1)                 51        
                                                                 
Total params: 5,861
Trainable params: 5,861
Non-trainable params: 0
_________________________________________________________________


In [58]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [59]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100


Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [60]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: -2.0447e+16 - accuracy: 0.0000e+00 - 205ms/epoch - 102ms/step
Loss: -2.044677378067661e+16, Accuracy: 0.0


In [62]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [63]:
# Importing Linear regression
from sklearn.linear_model import LinearRegression
lr= LinearRegression()

In [64]:
lr.fit(X_train, y_train)
pred = lr.predict(X_test)

In [65]:
from sklearn.metrics import r2_score
r2=(r2_score(y_test,pred))
print(r2*100)

-4.458480308185697


In [78]:
# Putting independent variables as x and dependent variable as y
x=X_train
y=y_train

In [79]:
# Finding out the linear regression using OLS method
x= sm.add_constant(x)
results = sm.OLS(y,x)

  return ptp(axis=axis, out=out, **kwargs)


In [80]:
# Fitting the model and summarizing
model=results.fit()
model.summary()

0,1,2,3
Dep. Variable:,AvgTemperature,R-squared:,0.393
Model:,OLS,Adj. R-squared:,0.325
Method:,Least Squares,F-statistic:,5.783
Date:,"Sat, 02 Apr 2022",Prob (F-statistic):,1.5e-09
Time:,12:36:06,Log-Likelihood:,-159.6
No. Observations:,160,AIC:,353.2
Df Residuals:,143,BIC:,405.5
Df Model:,16,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.8733,0.815,1.072,0.286,-0.737,2.484
co2,-0.0067,0.016,-0.427,0.670,-0.038,0.024
co2_per_capita,1.6327,3.400,0.480,0.632,-5.088,8.354
cement_co2,-0.0132,0.041,-0.323,0.747,-0.094,0.068
cement_co2_per_capita,2.6235,5.386,0.487,0.627,-8.024,13.271
coal_co2,0.0061,0.016,0.391,0.696,-0.025,0.037
coal_co2_per_capita,-1.6401,3.385,-0.484,0.629,-8.331,5.051
flaring_co2,-0.0024,0.011,-0.220,0.826,-0.024,0.020
gas_co2,0.0130,0.016,0.801,0.424,-0.019,0.045

0,1,2,3
Omnibus:,37.854,Durbin-Watson:,1.838
Prob(Omnibus):,0.0,Jarque-Bera (JB):,100.691
Skew:,-0.947,Prob(JB):,1.36e-22
Kurtosis:,6.394,Cond. No.,7640000000000000.0


# Using Random Forest Classifier 

In [81]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

 Random forest predictive accuracy: 0.630


In [82]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
2/2 - 3s - loss: -6.7187e+02 - accuracy: 0.0000e+00 - 3s/epoch - 1s/step
Loss: -671.8673095703125, Accuracy: 0.0


# Using Multiple Linear Regression Model

In [84]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [85]:
predictions = regressor.predict(X_test)

In [86]:
from sklearn.metrics import r2_score
r_squared = r2_score(y_test, predictions)

In [87]:
N = len(X_test)

In [90]:
k=20

adjusted_r_squared = 1 - (((1 - (r_squared ** 2)) * (N - 1)) / (N - k - 1))

print(f'The adjusted R score of our model is: {adjusted_r_squared}')

The adjusted R score of our model is: -0.6028680712942442
