In [3]:
import numpy as np
import pandas as pd
import requests
import json
import sys, os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src.Demand_class import Demand
import matplotlib.pyplot as plt
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from keras.layers import Dense, Dropout, SimpleRNN
from keras.models import Sequential

In [4]:
def create_layers_SimpleRNN(model, input_shape, units=200, activation='tanh', dropout=0.15):
    """Creates layers of the RNN model passed in. 

    Args:
        model (RNN obj): instance of a neural network model.
        input_shape (tup): shape of the input layer
        units (int, optional): Number of units in each layer; Defaults to 200. 
        activation (str, optional): Activation function.; Defaults to 'tanh'.
        dropout (float, optional): Dropout ratio; Defaults to 0.15.
    """
    model.add(SimpleRNN(units, activation=activation, return_sequences=True, 
                                input_shape=input_shape))
    model.add(Dropout(dropout))

    model.add(SimpleRNN(units, activation=activation, return_sequences=True))
    model.add(Dropout(dropout))

    model.add(SimpleRNN(units, activation=activation, return_sequences=False))
    model.add(Dropout(dropout))

    model.add(Dense(1))

In [5]:
def compile_model(model, X_train, y_train, optimizer='adam', 
                            loss='MSE', epochs=10, batch_size=1000):
    """Compiles the RNN model passed to it using X_train and y_Train; predicts
    target variable from X_test.

    Args:
        model (RNN obj): Built RNN to be compiled and used to predict
        target variables
        X_train (arr): Training feature matrix
        y_train (arr): Training target matrix
        X_test (arr): Test feature matrix
        optimizer (str, optional): type of optimization used to compile 
        the RNN model. Defaults to 'adam'.
        loss (str, optional): Loss metric to be used to compile the 
        RNN model. Defaults to 'MSE'.
        epochs (int, optional): Number of epochs over which to train 
        RNN model. Defaults to 10.
        batch_size (int, optional): Batch size to be used in each step 
        during RNN training. Defaults to 1000.

    Returns:
        arr: Array of target variable predictions based on X_test
        matrix.
    """
    model.compile(optimizer=optimizer, loss=loss)

    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

In [6]:
def unscale_y(y_array, scaler, n_lag_variables):
    """Reshapes target matrix so that it can be inverse
    scaled to extract predictions in original scale.

    Args:
        y_array (arr): target matrix; either predictions 
        or y_test
        scaler (scaler obj): instance of scaler, e.g. MinMaxScaler()
        n_lag_variables (int): number of lag variables used in the 
        feature matrix. Will add this many columns of zeros so that the
        y matrix matches the shape of the scaler

    Returns:
        [arr]: unscaled y_matrix 
    """
    zeros = np.zeros((len(y_array), n_lag_variables))
    y_with_zeros = np.concatenate([y_array, zeros], axis=1)
    unscaled_y = scaler.inverse_transform(y_with_zeros)[:, 0]
    return unscaled_y

In [7]:
def mean_abs_percent_error(y_test, y_pred):
    """Calculates mean absolute percent error between y_test
    matrix and predictions from a supervised learning model.

    Args:
        y_test (arr): matrix of actual target values
        y_pred (arr): matrix of predicted target values

    Returns:
        float: error metric
    """
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean(np.abs((y_test - y_pred)/y_test)) * 100

In [8]:
def plot_predictions_vs_true(ax, y_test, predictions, date_index):
    """Plots both y_true and y_predicted values on the same axes.

    Args:
        ax (axes): Axes on which to plot the predictions.
        y_test (arr): Matrix of actual target variables.
        predictions (arr): Matrix of predicted target variables.
        date_index (arr): Range of y_test indices; plotted along
        the x-axis
    """
    ax.plot(date_index, y_test, label='Actual')
    ax.plot(preds_index, predictions, alpha=0.5, label='Predicted')
    ax.legend(fontsize=12)
    ax.set_xlabel('Date', size=16)
    ax.set_ylabel('Megawatthours', size=16)

In [9]:
ny = Demand('NY')

In [10]:
ny.load_data()

In [11]:
ny.dataframe.tail()

Unnamed: 0,Time,Megawatthours
50865,2021-04-19 11:00:00,15171.0
50866,2021-04-19 12:00:00,15402.0
50867,2021-04-19 13:00:00,15585.0
50868,2021-04-19 14:00:00,15610.0
50869,2021-04-19 15:00:00,15434.0


In [12]:
ny.split_idx

'2021-04-19 16:00:00'

In [13]:
ny.extend_time()

In [14]:
ny.dataframe.tail()

Unnamed: 0,Megawatthours,Time
50889,,2021-04-20 11:00:00
50890,,2021-04-20 12:00:00
50891,,2021-04-20 13:00:00
50892,,2021-04-20 14:00:00
50893,,2021-04-20 15:00:00


In [15]:
lag_24_ny = ny.create_lag_variables(ny.dataframe, 24, 24)

In [16]:
lag_24_ny.tail()

Unnamed: 0,Megawatthours,Time,0,1,2,3,4,5,6,7,...,14,15,16,17,18,19,20,21,22,23
50889,,2021-04-20 11:00:00,13228.0,13273.0,13361.0,13366.0,13589.0,13895.0,14215.0,14566.0,...,12758.0,12403.0,12200.0,12165.0,12432.0,13174.0,14321.0,15166.0,15278.0,15163.0
50890,,2021-04-20 12:00:00,13273.0,13361.0,13366.0,13589.0,13895.0,14215.0,14566.0,14922.0,...,12403.0,12200.0,12165.0,12432.0,13174.0,14321.0,15166.0,15278.0,15163.0,15171.0
50891,,2021-04-20 13:00:00,13361.0,13366.0,13589.0,13895.0,14215.0,14566.0,14922.0,15245.0,...,12200.0,12165.0,12432.0,13174.0,14321.0,15166.0,15278.0,15163.0,15171.0,15402.0
50892,,2021-04-20 14:00:00,13366.0,13589.0,13895.0,14215.0,14566.0,14922.0,15245.0,15493.0,...,12165.0,12432.0,13174.0,14321.0,15166.0,15278.0,15163.0,15171.0,15402.0,15585.0
50893,,2021-04-20 15:00:00,13589.0,13895.0,14215.0,14566.0,14922.0,15245.0,15493.0,14986.0,...,12432.0,13174.0,14321.0,15166.0,15278.0,15163.0,15171.0,15402.0,15585.0,15610.0


In [17]:
sclr = MinMaxScaler()

In [18]:
X_train, X_test, y_train, _ = ny.scale_split(lag_24_ny, ny.split_idx, sclr)

In [19]:
X_test

array([[0.14894953, 0.16155518, 0.17512678, 0.18681478, 0.20241488,
        0.20864525, 0.18903646, 0.15744989, 0.1203574 , 0.08848104,
        0.0660227 , 0.05076069, 0.04322627, 0.04433712, 0.05409321,
        0.06549143, 0.07843516, 0.08751509, 0.08944699, 0.08968848,
        0.09186187, 0.09611205, 0.09635354, 0.10712388],
       [0.16155518, 0.17512678, 0.18681478, 0.20241488, 0.20864525,
        0.18903646, 0.15744989, 0.1203574 , 0.08848104, 0.0660227 ,
        0.05076069, 0.04322627, 0.04433712, 0.05409321, 0.06549143,
        0.07843516, 0.08751509, 0.08944699, 0.08968848, 0.09186187,
        0.09611205, 0.09635354, 0.10712388, 0.12190292],
       [0.17512678, 0.18681478, 0.20241488, 0.20864525, 0.18903646,
        0.15744989, 0.1203574 , 0.08848104, 0.0660227 , 0.05076069,
        0.04322627, 0.04433712, 0.05409321, 0.06549143, 0.07843516,
        0.08751509, 0.08944699, 0.08968848, 0.09186187, 0.09611205,
        0.09635354, 0.10712388, 0.12190292, 0.13735813],
       [0.186

In [20]:
X_test.shape

(24, 24)

In [23]:
X_train, X_test, y_train, _ = ny.reshape_for_rnn(X_train, X_test, y_train, _)

In [21]:
ny_model_24 = Sequential()

In [25]:
create_layers_SimpleRNN(ny_model_24, (X_train.shape[1], 1))

In [26]:
compile_model(ny_model_24, X_train, y_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
preds_ny_24 = ny_model_24.predict(X_test)

In [28]:
preds_ny_24

array([[0.17409517],
       [0.19246529],
       [0.20834373],
       [0.22034006],
       [0.22662058],
       [0.22837465],
       [0.22686982],
       [0.20830522],
       [0.16987264],
       [0.12641469],
       [0.09161817],
       [0.07561885],
       [0.07355583],
       [0.0824132 ],
       [0.10134571],
       [0.13216017],
       [0.17622879],
       [0.21699664],
       [0.2312709 ],
       [0.21984039],
       [0.20440184],
       [0.20111482],
       [0.21155575],
       [0.22604968]], dtype=float32)

In [31]:
preds_ny_24_unscaled = unscale_y(preds_ny_24, sclr, 24)

In [32]:
preds_ny_24_unscaled

array([14975.64046814, 15355.9938383 , 15684.75691675, 15933.14090519,
       16063.17920676, 16099.49702708, 16068.33965516, 15683.95967899,
       14888.21304357, 13988.41608277, 13267.95426833, 12936.68825208,
       12893.97355521, 13077.36538912, 13469.362935  , 14107.37635849,
       15019.8171275 , 15863.91543052, 16159.46386872, 15922.7953261 ,
       15603.14001022, 15535.08231609, 15751.26178673, 16051.35855271])

In [37]:
ny.dataframe.iloc[-192:-168]

Unnamed: 0,Megawatthours,Time
50702,16757.0,2021-04-12 16:00:00
50703,16901.0,2021-04-12 17:00:00
50704,17156.0,2021-04-12 18:00:00
50705,17143.0,2021-04-12 19:00:00
50706,17134.0,2021-04-12 20:00:00
50707,16910.0,2021-04-12 21:00:00
50708,16115.0,2021-04-12 22:00:00
50709,15093.0,2021-04-12 23:00:00
50710,14073.0,2021-04-13 00:00:00
50711,13323.0,2021-04-13 01:00:00
