<font size="8">Applied Machine Learning - Semester Project</font>
<font size="6">Code by Alexander M. Pellegrino</font>
<font size="6">Under Dr. Shucheng Yu</font>
<font size="6">On October 28th, 2023</font>

<font size="6">Section 1 - Initial Setup</font>\
<font size="4">This section performs initial setup steps for TensorFlow and ensures that we're properly utilizing the GPU.</font>

In [10]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LSTM

# TensorFlow is verbose by default and will print dozens of
# warnings on WSL2 environments despite being the official
# platform for Windows hosts. (These are expected and can
# be safely ignored, but tend to cover the actual target output.)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

# OneDNN Optimizations can cause variance in numbers during
# linux-to-windows floating-point representation conversion.
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

# Verify GPU learning works - this amount of data would take
# an incredible amount of time to train on a consumer CPU.
print("GPUs:", len(tf.config.list_physical_devices('GPU')))

GPUs: 1


<font size="6">Section 2 - Data Processing</font>\
<font size="4">This section is responsible for reading the data from our csv file and properly formatting it to feed to Keras.</font>

In [11]:
# Read data file
full_frame = pd.read_csv("global_electricity_statistics.csv")

# Ensure file fully parsed correctly
full_frame

Unnamed: 0,Country,Features,Region,1980,1981,1982,1983,1984,1985,1986,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Algeria,net generation,Africa,6.683,7.65,8.824,9.615,10.537,11.569,12.214,...,53.9845,56.3134,60.39972,64.68244,66.75504,71.49546,72.10903,76.685,72.73591277,77.53072719
1,Angola,net generation,Africa,0.905,0.906,0.995,1.028,1.028,1.028,1.088,...,6.03408,7.97606,9.21666,9.30914,10.203511,10.67604,12.83194,15.4,16.6,16.429392
2,Benin,net generation,Africa,0.005,0.005,0.005,0.005,0.005,0.005,0.005,...,0.04612,0.08848,0.22666,0.31056,0.26004,0.3115,0.19028,0.2017,0.22608,0.24109728
3,Botswana,net generation,Africa,0.443,0.502,0.489,0.434,0.445,0.456,0.538,...,0.33,0.86868,2.17628,2.79104,2.52984,2.8438,2.97076,3.0469,2.05144,2.18234816
4,Burkina Faso,net generation,Africa,0.098,0.108,0.115,0.117,0.113,0.115,0.122,...,0.86834,0.98268,1.11808,1.43986,1.5509,1.64602,1.6464,1.72552,1.647133174,1.761209666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1605,Trinidad and Tobago,distribution losses,Central & South America,0.244,0.21,0.152,0.326,0.36,0.407,0.337,...,0.239,0.234,0.245,0.248,0.253,0.274864,0.425807,0.424101,0.422757,0.422757
1606,Turks and Caicos Islands,distribution losses,Central & South America,0.00035,0.00035,0.00035,0.00035,0.00035,0.00035,0.00035,...,0,0,0,0,0,0.014,0.0125,0.0125,0.0125,0.01277172
1607,U.S. Virgin Islands,distribution losses,Central & South America,0.05243,0.05537,0.05607,0.05761,0.05789,0.05922,0.06055,...,0.063,0.06,0.06,0.06,0.065,0.065,0.05,0.051,0.051,0.051
1608,Uruguay,distribution losses,Central & South America,0.55,0.426,0.627,0.662,0.78,0.702,0.911,...,1.292,1.282,1.253,1.49,1.608,1.694491,1.557257,1.322331,1.129273,1.129273


In [12]:
# Filtering to just usage statistics
pruned_frame = full_frame[full_frame['Features'] == 'net consumption']

pruned_frame

Unnamed: 0,Country,Features,Region,1980,1981,1982,1983,1984,1985,1986,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
230,Algeria,net consumption,Africa,5.915,6.713,7.733,8.207,8.959,9.686,10.343,...,42.8675,45.2014,49.20572,53.45544,55.82304,60.28346,62.11403,66.646,63.69490277,68.66337919
231,Angola,net consumption,Africa,0.736,0.737,0.815,0.832,0.86,0.826,0.886,...,5.33508,7.05006,8.14866,8.23114,9.036511,9.468667,11.384079,13.658591,14.754764,14.56049071
232,Benin,net consumption,Africa,0.101,0.115,0.155,0.155,0.137,0.142,0.14,...,0.93312,0.93648,0.93366,1.08156,1.06104,1.125821,1.187964,0.9297,0.52408,0.523382994
233,Botswana,net consumption,Africa,0.41199,0.47686,0.53677,0.56362,0.59985,0.64708,0.74034,...,3.209,3.34768,3.60528,3.73004,3.61584,3.5528,3.651212,4.162138,3.22213,3.35303816
234,Burkina Faso,net consumption,Africa,0.09114,0.10044,0.10695,0.10881,0.10509,0.10695,0.11346,...,1.24784,1.35648,1.431962,1.69966,1.9974,2.06792,2.2648,2.651386,2.467133174,2.642688237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Trinidad and Tobago,net consumption,Central & South America,1.706,1.964,2.378,2.42,2.47,2.446,2.799,...,8.34532,8.70094,8.61292,8.7491,8.60216,8.490056,8.246993,8.213919,7.779031516,8.303561981
456,Turks and Caicos Islands,net consumption,Central & South America,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,...,0.197,0.196,0.21338,0.22466,0.235,0.212164,0.23008,0.232618,0.2435,0.25976828
457,U.S. Virgin Islands,net consumption,Central & South America,0.69657,0.73563,0.74493,0.76539,0.76911,0.78678,0.80445,...,0.715801,0.667248,0.640179,0.632274,0.62233,0.45824,0.56,0.568,0.5978,0.6415072
458,Uruguay,net consumption,Central & South America,2.745,3.119,2.888,2.999,3.144,3.145,3.282,...,9.86876,9.97776,10.33684,10.7926,11.39642,11.098889,11.768859,11.713604,11.780227,13.61377336


In [13]:
# Drop unused columns
pruned_frame = pruned_frame.drop(["Country", "Features", "Region"], axis='columns')

pruned_frame

Unnamed: 0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
230,5.915,6.713,7.733,8.207,8.959,9.686,10.343,11.119,11.954,12.19,...,42.8675,45.2014,49.20572,53.45544,55.82304,60.28346,62.11403,66.646,63.69490277,68.66337919
231,0.736,0.737,0.815,0.832,0.86,0.826,0.886,0.894,0.893,0.899,...,5.33508,7.05006,8.14866,8.23114,9.036511,9.468667,11.384079,13.658591,14.754764,14.56049071
232,0.101,0.115,0.155,0.155,0.137,0.142,0.14,0.135,0.131,0.15,...,0.93312,0.93648,0.93366,1.08156,1.06104,1.125821,1.187964,0.9297,0.52408,0.523382994
233,0.41199,0.47686,0.53677,0.56362,0.59985,0.64708,0.74034,0.85878,0.816,0.80621,...,3.209,3.34768,3.60528,3.73004,3.61584,3.5528,3.651212,4.162138,3.22213,3.35303816
234,0.09114,0.10044,0.10695,0.10881,0.10509,0.10695,0.11346,0.12369,0.13299,0.14229,...,1.24784,1.35648,1.431962,1.69966,1.9974,2.06792,2.2648,2.651386,2.467133174,2.642688237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,1.706,1.964,2.378,2.42,2.47,2.446,2.799,2.949,2.981,2.883,...,8.34532,8.70094,8.61292,8.7491,8.60216,8.490056,8.246993,8.213919,7.779031516,8.303561981
456,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.0093,...,0.197,0.196,0.21338,0.22466,0.235,0.212164,0.23008,0.232618,0.2435,0.25976828
457,0.69657,0.73563,0.74493,0.76539,0.76911,0.78678,0.80445,0.84165,0.84816,0.83049,...,0.715801,0.667248,0.640179,0.632274,0.62233,0.45824,0.56,0.568,0.5978,0.6415072
458,2.745,3.119,2.888,2.999,3.144,3.145,3.282,3.464,3.722,3.601,...,9.86876,9.97776,10.33684,10.7926,11.39642,11.098889,11.768859,11.713604,11.780227,13.61377336


In [14]:
# Countries without a power consumption listed typically
# didn't have an energy grid at that time or were incredibly
# remote. We can fairly safely assume their consumption to
# be 0 in those cases, at least relative to a global scale.

# Indicator for "surveyed, no power grid"
pruned_frame.replace("--", 0, inplace=True)
pruned_frame.replace("ie", 0, inplace=True)

pruned_frame

Unnamed: 0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
230,5.915,6.713,7.733,8.207,8.959,9.686,10.343,11.119,11.954,12.19,...,42.8675,45.2014,49.20572,53.45544,55.82304,60.28346,62.11403,66.646,63.69490277,68.66337919
231,0.736,0.737,0.815,0.832,0.86,0.826,0.886,0.894,0.893,0.899,...,5.33508,7.05006,8.14866,8.23114,9.036511,9.468667,11.384079,13.658591,14.754764,14.56049071
232,0.101,0.115,0.155,0.155,0.137,0.142,0.14,0.135,0.131,0.15,...,0.93312,0.93648,0.93366,1.08156,1.06104,1.125821,1.187964,0.9297,0.52408,0.523382994
233,0.41199,0.47686,0.53677,0.56362,0.59985,0.64708,0.74034,0.85878,0.816,0.80621,...,3.209,3.34768,3.60528,3.73004,3.61584,3.5528,3.651212,4.162138,3.22213,3.35303816
234,0.09114,0.10044,0.10695,0.10881,0.10509,0.10695,0.11346,0.12369,0.13299,0.14229,...,1.24784,1.35648,1.431962,1.69966,1.9974,2.06792,2.2648,2.651386,2.467133174,2.642688237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,1.706,1.964,2.378,2.42,2.47,2.446,2.799,2.949,2.981,2.883,...,8.34532,8.70094,8.61292,8.7491,8.60216,8.490056,8.246993,8.213919,7.779031516,8.303561981
456,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.0093,...,0.197,0.196,0.21338,0.22466,0.235,0.212164,0.23008,0.232618,0.2435,0.25976828
457,0.69657,0.73563,0.74493,0.76539,0.76911,0.78678,0.80445,0.84165,0.84816,0.83049,...,0.715801,0.667248,0.640179,0.632274,0.62233,0.45824,0.56,0.568,0.5978,0.6415072
458,2.745,3.119,2.888,2.999,3.144,3.145,3.282,3.464,3.722,3.601,...,9.86876,9.97776,10.33684,10.7926,11.39642,11.098889,11.768859,11.713604,11.780227,13.61377336


In [15]:
# Ensure that numeric data is actually processed as such
# (It currently seems to be getting string parsed because
# of the -- that appears in some rows even after correction.)
pruned_frame = pruned_frame.apply(pd.to_numeric, errors='coerce')

# Empty cell - missing data
pruned_frame = pruned_frame.fillna(0)

# Frame should match above but be forced into float64 columns
pruned_frame

Unnamed: 0,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
230,5.91500,6.71300,7.73300,8.20700,8.95900,9.68600,10.34300,11.11900,11.95400,12.19000,...,42.867500,45.201400,49.205720,53.455440,55.823040,60.283460,62.114030,66.646000,63.694903,68.663379
231,0.73600,0.73700,0.81500,0.83200,0.86000,0.82600,0.88600,0.89400,0.89300,0.89900,...,5.335080,7.050060,8.148660,8.231140,9.036511,9.468667,11.384079,13.658591,14.754764,14.560491
232,0.10100,0.11500,0.15500,0.15500,0.13700,0.14200,0.14000,0.13500,0.13100,0.15000,...,0.933120,0.936480,0.933660,1.081560,1.061040,1.125821,1.187964,0.929700,0.524080,0.523383
233,0.41199,0.47686,0.53677,0.56362,0.59985,0.64708,0.74034,0.85878,0.81600,0.80621,...,3.209000,3.347680,3.605280,3.730040,3.615840,3.552800,3.651212,4.162138,3.222130,3.353038
234,0.09114,0.10044,0.10695,0.10881,0.10509,0.10695,0.11346,0.12369,0.13299,0.14229,...,1.247840,1.356480,1.431962,1.699660,1.997400,2.067920,2.264800,2.651386,2.467133,2.642688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,1.70600,1.96400,2.37800,2.42000,2.47000,2.44600,2.79900,2.94900,2.98100,2.88300,...,8.345320,8.700940,8.612920,8.749100,8.602160,8.490056,8.246993,8.213919,7.779032,8.303562
456,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00465,0.00930,...,0.197000,0.196000,0.213380,0.224660,0.235000,0.212164,0.230080,0.232618,0.243500,0.259768
457,0.69657,0.73563,0.74493,0.76539,0.76911,0.78678,0.80445,0.84165,0.84816,0.83049,...,0.715801,0.667248,0.640179,0.632274,0.622330,0.458240,0.560000,0.568000,0.597800,0.641507
458,2.74500,3.11900,2.88800,2.99900,3.14400,3.14500,3.28200,3.46400,3.72200,3.60100,...,9.868760,9.977760,10.336840,10.792600,11.396420,11.098889,11.768859,11.713604,11.780227,13.613773


In [16]:
global_frame = pd.DataFrame(pruned_frame.sum(axis='rows'))

global_frame

Unnamed: 0,0
1980,7323.372305
1981,7410.429118
1982,7569.413689
1983,7872.425977
1984,8361.73772
1985,8662.099349
1986,8888.361763
1987,9278.659622
1988,9699.463799
1989,10131.936808


In [17]:
global_frame_normalized = (global_frame - global_frame.min()) / (global_frame.max() - global_frame.min())

global_frame_normalized

Unnamed: 0,0
1980,0.0
1981,0.004831
1982,0.013654
1983,0.03047
1984,0.057625
1985,0.074294
1986,0.08685
1987,0.10851
1988,0.131863
1989,0.155863


<font size="6">Section 3 - Training and Prediction</font>\
<font size="4">This section is responsible for training the LSTM and rescaling the data for a user-readable output.</font>

In [18]:
# How many years to look back for training
look_back = 15

# Function to convert an array of values into a dataset matrix
def create_dataset(frame, look_back):
    consumption = frame[0].tolist()
    x = []
    y = []
    for i in range(len(consumption) - look_back - 1):
        x.append(consumption[i:(i + look_back)])
        y.append(consumption[i + look_back])
    return np.array(x), np.array(y)

# Function to rescale normalized data back to original scaling
def rescale(normalized_value, frame):
    return normalized_value.item() * (frame.max().item() - frame.min().item()) + frame.min().item()

# Prepare the data (create sequences, split into train and test)
training_x, training_y = create_dataset(global_frame_normalized, look_back)

training_x = np.reshape(training_x, (training_x.shape[0], training_x.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(100, activation='tanh'))
model.add(Dense(1))
adam = Adam(learning_rate=0.001)
model.compile(optimizer=adam, loss='mean_squared_error')

# Train the model
model.fit(training_x, training_y, epochs=75, batch_size=16)

# Predict for 2022 using the last sequence from the training data
sequence_2021 = training_x[-1].reshape(1, training_x.shape[1], training_x.shape[2])
prediction_2022 = model.predict(sequence_2021).flatten()[0]

# Prepare a new sequence for 2023 prediction, including the predicted 2022 value
sequence_2022 = np.append(sequence_2021[0, 1:, :], prediction_2022).reshape(1, sequence_2021.shape[1], sequence_2021.shape[2])
prediction_2023 = model.predict(sequence_2022).flatten()[0]

print("Global Power Consumption Estimates:")
print(f"2022: {rescale(prediction_2022, global_frame)} billion kWh")
print(f"2023: {rescale(prediction_2023, global_frame)} billion kWh")

Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75
Global Power Consumption Estimates:
2022: 24928.962600186424 billion kWh
2023: 25654.94509751822 billion kWh
