In [9]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
# load the dataframe
df = pd.read_csv("merged_countries.csv", encoding ="unicode_escape")
df.head()

Unnamed: 0,country_name,Year,urbanization,gdp_per_cap,inflation,gdp_current,unemployment,literacy_adult,health_exp,literacy_youth,life_exp,re_consumption,ff_consumption,energy_use
0,Canada,1971,76.09,4520.162878,2.704918,99271960000.0,6.4,3042833000.0,3042833000.0,3042833000.0,73.029268,3042833000.0,84.15415,6436.226256
1,Algeria,1971,39.665,359.824582,2.626642,5077222000.0,3042833000.0,3042833000.0,3042833000.0,3042833000.0,43.67,3042833000.0,98.99982,245.527602
2,Australia,1971,84.16,3494.97331,6.138107,45214470000.0,3042833000.0,3042833000.0,3042833000.0,3042833000.0,71.068293,3042833000.0,91.17778,3989.629744
3,Austria,1971,65.301,2380.978458,4.704258,17858490000.0,1.5,3042833000.0,3042833000.0,3042833000.0,70.114634,3042833000.0,90.19969,2508.520919
4,Belgium,1971,93.976,3082.927989,4.343629,29821660000.0,2.1,3042833000.0,3042833000.0,3042833000.0,71.060488,3042833000.0,3042833000.0,4099.95532


In [14]:
df

Unnamed: 0,country_name,Year,urbanization,gdp_per_cap,inflation,gdp_current,unemployment,literacy_adult,health_exp,literacy_youth,life_exp,re_consumption,ff_consumption,energy_use
0,Canada,1971,76.090,4520.162878,2.704918,9.927196e+10,6.400000e+00,3.042833e+09,3.042833e+09,3.042833e+09,73.029268,3.042833e+09,8.415415e+01,6436.226256
1,Algeria,1971,39.665,359.824582,2.626642,5.077222e+09,3.042833e+09,3.042833e+09,3.042833e+09,3.042833e+09,43.670000,3.042833e+09,9.899982e+01,245.527602
2,Australia,1971,84.160,3494.973310,6.138107,4.521447e+10,3.042833e+09,3.042833e+09,3.042833e+09,3.042833e+09,71.068293,3.042833e+09,9.117778e+01,3989.629744
3,Austria,1971,65.301,2380.978458,4.704258,1.785849e+10,1.500000e+00,3.042833e+09,3.042833e+09,3.042833e+09,70.114634,3.042833e+09,9.019969e+01,2508.520919
4,Belgium,1971,93.976,3082.927989,4.343629,2.982166e+10,2.100000e+00,3.042833e+09,3.042833e+09,3.042833e+09,71.060488,3.042833e+09,3.042833e+09,4099.955320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3476,Iceland,2015,93.700,52951.681511,1.633056,1.751721e+10,3.980000e+00,4.182286e+10,8.069790e+00,4.182286e+10,82.468293,7.733000e+01,1.128887e+01,17478.893037
3477,New Zealand,2015,86.341,38630.726589,0.292705,1.780645e+11,5.410000e+00,4.182286e+10,9.283443e+00,4.182286e+10,81.456829,3.124000e+01,5.974917e+01,4431.514731
3478,Portugal,2015,63.514,19250.106538,0.487939,1.993941e+11,1.245000e+01,4.182286e+10,9.316458e+00,4.182286e+10,81.124390,2.721000e+01,7.702412e+01,2131.681888
3479,Slovak Republic,2015,53.889,16390.882175,-0.325220,8.890088e+10,1.149000e+01,4.182286e+10,6.792541e+00,4.182286e+10,76.563415,1.341000e+01,6.409150e+01,3003.664220


In [3]:
# Replace missing values (if any) with median of the column
df = df.fillna(df.median())

# Check to see if any NaN values remain
print(df.isna().sum())

country_name      0
Year              0
urbanization      0
gdp_per_cap       0
inflation         0
gdp_current       0
unemployment      0
literacy_adult    0
health_exp        0
literacy_youth    0
life_exp          0
re_consumption    0
ff_consumption    0
energy_use        0
dtype: int64


  


In [4]:
df.dtypes

country_name       object
Year                int64
urbanization      float64
gdp_per_cap       float64
inflation         float64
gdp_current       float64
unemployment      float64
literacy_adult    float64
health_exp        float64
literacy_youth    float64
life_exp          float64
re_consumption    float64
ff_consumption    float64
energy_use        float64
dtype: object

In [5]:
# extract input and output features
X = df.drop(["country_name", "Year"], axis=1).values
y = df["gdp_current"].values

In [6]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Using non-scaled data

In [7]:
# Define the model - deep neural net
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(1))

# Compile the Sequential model together
nn.compile(loss="binary_crossentropy", optimizer="adam")

# Train the model
fit_model = nn.fit(X_train, y_train, epochs=100)

# Evaluate the model using the scaled test data
model_loss = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
22/22 - 0s - loss: -5.5242e+11 - 109ms/epoch - 5ms/step
Loss: -552415068160.0


In [10]:
# Predict the scaled test data
y_pred = nn.predict(X_test)

# Calculate the accuracy  (R^2 score)
r2_score_ = r2_score(y_test, y_pred)

# Calculating the MSE of above nn model
mse = mean_squared_error(y_test, y_pred)

print(f"Accuracy: {np.round(r2_score_ * 100, 2)}%")
print("Mean Squared Error:", mse)

Accuracy: -20.68%
Mean Squared Error: 3.4339554904472226e+21


### Using Scaled Data

In [11]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(1))

# Compile the Sequential model together
nn.compile(loss="binary_crossentropy", optimizer="adam")

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

# Evaluate the model using the scaled test data
model_loss = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
22/22 - 0s - loss: -6.0345e+11 - 83ms/epoch - 4ms/step
Loss: -603454570496.0


In [12]:
# Predict the scaled test data
y_pred = nn.predict(X_test_scaled)

# Calculate the accuracy  (R^2 score)
r2_score_ = r2_score(y_test, y_pred)

# Calculating the MSE of above nn model
mse = mean_squared_error(y_test, y_pred)

print(f"Accuracy: {np.round(r2_score_ * 100, 2)}%")
print("Mean Squared Error:", mse)

Accuracy: -55.03%
Mean Squared Error: 4.411569774733559e+21
