<!-- <h1 style="background:green;">Your Name</h1> -->


# Demand Forecasting Project
1.	Download electricityLoadData.csv from the shared box and upload to your own google drive 
2.	Instructions for coding (What you need to code):
    - Load data and preprocess data using the predefined functions.
    - Go to `build_model` section and define your own network.
    - Choose your parameters to set up the training routine.
    - Plot your result by using the `Analyze Result` section.
    - Print your notebook by clicking File > Save > PDF to upload your work.

# Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Set up Plotly credentials

In [2]:
import plotly.io as pio
pio.renderers.default = "notebook_connected"

# Define constants

In [3]:
FILE_PATH = "electricityLoadData - Q3.csv"
WINDOW = 48

# Load and preprocess data

In [4]:
def load_data(file_path):
    df = pd.read_csv(file_path, header=1, error_bad_lines=False)
    df.drop(df.columns[[2]], axis=1, inplace=True)
    return df

In [5]:
def normalize_data(dataset): 
    values = dataset.values
    minima_demand = np.amin(values[:, -1])
    maxima_demand = np.amax(values[:, -1])
    scaling_parameter_demand = maxima_demand - minima_demand
    for i in range(values.shape[1]):
        values[:, i] = (values[:, i]-np.amin(values[:, i]))/(np.amax(values[:, i])-np.amin(values[:, i]))
    return minima_demand, maxima_demand, scaling_parameter_demand, pd.DataFrame(values)

In [6]:
def prepare_data(dataset, window_size):
    amount_of_features = len(dataset.columns)
    data = dataset.values
    sequence_length = window_size + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    windowed_mat = np.array(result)

    train_split = int(round(0.8 * windowed_mat.shape[0]))
    x_train = windowed_mat[:train_split, :-1]
    y_train = windowed_mat[:train_split, -1][:,-1]
    x_test = windowed_mat[train_split:, :-1]
    y_test = windowed_mat[train_split:, -1][:,-1]
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))
    return x_train, y_train, x_test, y_test

In [7]:
dataset = load_data(FILE_PATH)
min_demand, max_demand, demand_scaling_param, dataset = normalize_data(dataset)
x_train, y_train, x_test, y_test = prepare_data(dataset[::-1], WINDOW)


The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.





# Define and train model

In [8]:
model = Sequential()
model.add(LSTM(256, activation='relu', input_shape=(48,5)))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(32))
model.add(Dense(1))

In [9]:
model.compile(optimizer='adam', loss='mse')

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 256)               268288    
                                                                 
 dense (Dense)               (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 311,553
Trainable params: 311,553
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.fit(
    x_train,
    y_train,
    batch_size=128,
    epochs=15,
    validation_split=0.2,
    verbose=2)

Epoch 1/15
351/351 - 76s - loss: 0.0035 - val_loss: 6.2090e-04 - 76s/epoch - 215ms/step
Epoch 2/15
351/351 - 70s - loss: 3.4117e-04 - val_loss: 2.2674e-04 - 70s/epoch - 199ms/step
Epoch 3/15
351/351 - 66s - loss: 1.9542e-04 - val_loss: 2.1595e-04 - 66s/epoch - 188ms/step
Epoch 4/15
351/351 - 65s - loss: 1.8404e-04 - val_loss: 3.5318e-04 - 65s/epoch - 185ms/step
Epoch 5/15
351/351 - 63s - loss: 1.4950e-04 - val_loss: 2.0138e-04 - 63s/epoch - 180ms/step
Epoch 6/15
351/351 - 42s - loss: 1.4269e-04 - val_loss: 1.6851e-04 - 42s/epoch - 120ms/step
Epoch 7/15
351/351 - 42s - loss: 1.3181e-04 - val_loss: 2.2624e-04 - 42s/epoch - 119ms/step
Epoch 8/15
351/351 - 42s - loss: 1.3317e-04 - val_loss: 1.2185e-04 - 42s/epoch - 118ms/step
Epoch 9/15
351/351 - 42s - loss: 1.1464e-04 - val_loss: 1.2695e-04 - 42s/epoch - 119ms/step
Epoch 10/15
351/351 - 42s - loss: 1.0982e-04 - val_loss: 1.9605e-04 - 42s/epoch - 119ms/step
Epoch 11/15
351/351 - 41s - loss: 1.1824e-04 - val_loss: 1.2329e-04 - 41s/epoch - 1

<keras.callbacks.History at 0x2888ffaf880>

# Get the predicted and actual data

In [12]:
def denormalize_data(data, scaling_parameter, minima):
    return (data * scaling_parameter) + minima

In [13]:
predicted_data = denormalize_data(model.predict(x_test), demand_scaling_param, min_demand)
actual_data = denormalize_data(y_test, demand_scaling_param, min_demand)



# Calculate and print the mean absolute percentage error and mean absolute error

In [14]:
def calculate_mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [15]:
predicted_data = predicted_data.reshape((14016,))
mape = calculate_mape(actual_data, predicted_data)
mae = mean_absolute_error(actual_data, predicted_data)

In [16]:
print(f'Test MAPE: {mape:.3f}')
print(f'Test MAE: {mae:.3f}')

Test MAPE: 0.818
Test MAE: 433.947


# Plot the forecast using Plotly

# 

In [17]:
def plot_forecast(actual, predicted, time_window=24):
    import plotly.graph_objs as go

    actual_trace = go.Scatter(x=list(range(time_window)),
                            y=actual[:time_window],
                            mode='lines',
                            name='Actual',
                            line=dict(color='blue'))

    predicted_trace = go.Scatter(x=list(range(time_window)),
                                y=predicted[:time_window],
                                mode='lines',
                                name='Predicted',
                                line=dict(color='red'))

    layout = go.Layout(title=f'Actual vs Predicted Results in the Next {time_window * 1800} Seconds',
                    xaxis=dict(title='Time Sequence'),
                    yaxis=dict(title='Load (MW)'),
                    legend=dict(x=0, y=1))

    fig = go.Figure(data=[actual_trace, predicted_trace], layout=layout)
    fig.show()

In [18]:
plot_forecast(actual_data, predicted_data)