In [1]:
import sys, os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from keras.layers import Dense, Dropout, SimpleRNN, LSTM
from keras.models import Sequential
from tslearn.preprocessing import TimeSeriesScalerMinMax
sys.path.append(os.path.join(os.path.dirname('Demand'), '..', 'src'))
from Demand import Demand

In [2]:
path = '../data/demand_lower_48'

In [3]:
national_demand = Demand()

In [4]:
national_demand.load_and_clean_data(path)

In [5]:
national_demand.dataframe.head()

Unnamed: 0_level_0,Megawatthours,Year,Month,Hour,Day_of_week,Day_of_month,Day_of_year
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-07-01 02:00:00,335153,2015,7,2,2,1,182
2015-07-01 03:00:00,333837,2015,7,3,2,1,182
2015-07-01 04:00:00,398386,2015,7,4,2,1,182
2015-07-01 05:00:00,388954,2015,7,5,2,1,182
2015-07-01 06:00:00,392487,2015,7,6,2,1,182


In [6]:
def create_X_matrix(df, seq_len):
    X = []
    for i in range(seq_len, len(df)):
        X.append(df['Megawatthours'].iloc[i-seq_len:i].values)
    return np.array(X)

In [7]:
mwh = national_demand.dataframe['Megawatthours'].values

In [8]:
seq_len=20

In [9]:
mwh = mwh[seq_len:].reshape(-1, 1)

In [10]:
mwh.shape

(50159, 1)

In [11]:
X_matrix = create_X_matrix(national_demand.dataframe, 20)

In [12]:
X_matrix.shape

(50159, 20)

In [13]:
matrix_X_y = np.concatenate((X_matrix, mwh), axis=1)

In [14]:
matrix_X_y.shape

(50159, 21)

In [15]:
matrix_X_y[0]

array([335153, 333837, 398386, 388954, 392487, 404647, 422227, 442131,
       464371, 491512, 518043, 540514, 560701, 576400, 587405, 594477,
       595242, 586317, 572656, 554682, 543332])

In [16]:
scaler = TimeSeriesScalerMinMax()

In [17]:
split_index = 40899

In [18]:
train = matrix_X_y[:split_index]
test = matrix_X_y[split_index:]

In [19]:
train[0]

array([335153, 333837, 398386, 388954, 392487, 404647, 422227, 442131,
       464371, 491512, 518043, 540514, 560701, 576400, 587405, 594477,
       595242, 586317, 572656, 554682, 543332])

In [20]:
len(test)

9260

In [21]:
train_scaled = scaler.fit_transform(train)
train_scaled[0]

array([[0.00503433],
       [0.        ],
       [0.24693101],
       [0.21084907],
       [0.22436449],
       [0.27088235],
       [0.33813431],
       [0.4142767 ],
       [0.49935541],
       [0.6031828 ],
       [0.70467665],
       [0.79063905],
       [0.86786404],
       [0.92792028],
       [0.9700197 ],
       [0.99707351],
       [1.        ],
       [0.96585758],
       [0.91359767],
       [0.84483847],
       [0.80141925]])

In [22]:
test_scaled = scaler.transform(test)
test_scaled[0]

array([[0.0837891 ],
       [0.11987388],
       [0.27550166],
       [0.55331945],
       [0.8478172 ],
       [1.        ],
       [0.96192997],
       [0.83403725],
       [0.67062418],
       [0.47383172],
       [0.28023122],
       [0.10772884],
       [0.        ],
       [0.01901555],
       [0.1667802 ],
       [0.47494112],
       [0.74987835],
       [0.86461395],
       [0.84828432],
       [0.69034041],
       [0.45894237]])

In [23]:
train_scaled.shape

(40899, 21, 1)

In [24]:
X_train_scaled = train_scaled[:, :20]

In [25]:
X_train_scaled.shape

(40899, 20, 1)

In [26]:
y_train_scaled = train_scaled[:, 20]

In [27]:
y_train_scaled.shape

(40899, 1)

In [30]:
X_test_scaled = test_scaled[:, :20]
X_test_scaled.shape

(9260, 20, 1)

In [31]:
y_test_scaled = test_scaled[:, 20]
y_test_scaled.shape

(9260, 1)

## RNN

In [32]:
rnn_model = Sequential()

In [33]:
rnn_model.add(SimpleRNN(200, activation='tanh', return_sequences=True, input_shape=(X_train_scaled.shape[1], 1)))
rnn_model.add(Dropout(0.15))

rnn_model.add(SimpleRNN(200, activation='tanh', return_sequences=True))
rnn_model.add(Dropout(0.15))

rnn_model.add(SimpleRNN(200, activation='tanh', return_sequences=False))
rnn_model.add(Dropout(0.15))

rnn_model.add(Dense(1))

rnn_model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn (SimpleRNN)       (None, 20, 200)           40400     
_________________________________________________________________
dropout (Dropout)            (None, 20, 200)           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 20, 200)           80200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 20, 200)           0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 200)               80200     
_________________________________________________________________
dropout_2 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2

In [34]:
rnn_model.compile(optimizer='adam', loss='MSE')
rnn_model.fit(X_train_scaled, y_train_scaled, epochs=10, batch_size=1000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f956d009ef0>

In [36]:
rnn_predictions = rnn_model.predict(X_test_scaled)

In [39]:
rnn_predictions

array([[0.5512207 ],
       [0.27445024],
       [0.03562455],
       ...,
       [0.34028468],
       [0.6018811 ],
       [0.873911  ]], dtype=float32)

In [40]:
mae = mean_absolute_error(y_test_scaled, rnn_predictions)

In [41]:
mae

0.038066087026008964

In [None]:
pred_unscaled = scaler.inve