In [1]:
import numpy as np
import pandas as pd
import requests
import json
import sys, os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from src.Demand_class import Demand

ModuleNotFoundError: No module named 'config'

In [None]:
ny = Demand('NY')

In [None]:
ny.load_data()

In [None]:
ny.dataframe.head()

In [None]:
ny.dataframe.describe()

In [None]:
ny.dataframe.info()

In [None]:
ny.create_time_features()

In [None]:
df = ny.time_features_df

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
lag_df = ny.create_lag_variables(ny.dataframe, 24, 24)

In [None]:
lag_df.head()

In [None]:
lag_df.describe()

In [None]:
sclr = MinMaxScaler()

In [None]:
X_train, X_test, y_train, y_test = ny.scale_split(lag_df, '2020-03-01 00:00:00', sclr)

In [None]:
len(X_train)

In [None]:
len(X_test)

In [None]:
X_train, X_test, y_train, y_test = ny.reshape_for_rnn(X_train, X_test, y_train, y_test)

In [None]:
X_train.shape

In [None]:
import matplotlib.pyplot as plt
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from keras.layers import Dense, Dropout, SimpleRNN
from keras.models import Sequential

In [None]:
model = Sequential()

In [None]:
def create_layers_SimpleRNN(model, input_shape, units=200, activation='tanh', dropout=0.15):
    """Creates layers of the RNN model passed in. 

    Args:
        model (RNN obj): instance of a neural network model.
        input_shape (tup): shape of the input layer
        units (int, optional): Number of units in each layer; Defaults to 200. 
        activation (str, optional): Activation function.; Defaults to 'tanh'.
        dropout (float, optional): Dropout ratio; Defaults to 0.15.
    """
    model.add(SimpleRNN(units, activation=activation, return_sequences=True, 
                                input_shape=input_shape))
    model.add(Dropout(dropout))

    model.add(SimpleRNN(units, activation=activation, return_sequences=True))
    model.add(Dropout(dropout))

    model.add(SimpleRNN(units, activation=activation, return_sequences=False))
    model.add(Dropout(dropout))

    model.add(Dense(1))

In [None]:
create_layers_SimpleRNN(model, (X_train.shape[1], 1))

In [None]:
model

In [None]:
model.summary()

In [None]:
def compile_and_predict(model, X_train, y_train, X_test, optimizer='adam', 
                            loss='MSE', epochs=10, batch_size=1000):
    """Compiles the RNN model passed to it using X_train and y_Train; predicts
    target variable from X_test.

    Args:
        model (RNN obj): Built RNN to be compiled and used to predict
        target variables
        X_train (arr): Training feature matrix
        y_train (arr): Training target matrix
        X_test (arr): Test feature matrix
        optimizer (str, optional): type of optimization used to compile 
        the RNN model. Defaults to 'adam'.
        loss (str, optional): Loss metric to be used to compile the 
        RNN model. Defaults to 'MSE'.
        epochs (int, optional): Number of epochs over which to train 
        RNN model. Defaults to 10.
        batch_size (int, optional): Batch size to be used in each step 
        during RNN training. Defaults to 1000.

    Returns:
        arr: Array of target variable predictions based on X_test
        matrix.
    """
    model.compile(optimizer=optimizer, loss=loss)

    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
    
    predictions = model.predict(X_test)
    return predictions

In [None]:
preds = compile_and_predict(model, X_train, y_train, X_test, epochs=20)

In [None]:
preds

In [None]:
def unscale_y(y_array, scaler, n_lag_variables):
    """Reshapes target matrix so that it can be inverse
    scaled to extract predictions in original scale.

    Args:
        y_array (arr): target matrix; either predictions 
        or y_test
        scaler (scaler obj): instance of scaler, e.g. MinMaxScaler()
        n_lag_variables (int): number of lag variables used in the 
        feature matrix. Will add this many columns of zeros so that the
        y matrix matches the shape of the scaler

    Returns:
        [arr]: unscaled y_matrix 
    """
    zeros = np.zeros((len(y_array), n_lag_variables))
    y_with_zeros = np.concatenate([y_array, zeros], axis=1)
    unscaled_y = scaler.inverse_transform(y_with_zeros)[:, 0]
    return unscaled_y

In [None]:
unscaled_preds = unscale_y(preds, sclr, 24)

In [None]:
unscaled_preds

In [None]:
def mean_abs_percent_error(y_test, y_pred):
    """Calculates mean absolute percent error between y_test
    matrix and predictions from a supervised learning model.

    Args:
        y_test (arr): matrix of actual target values
        y_pred (arr): matrix of predicted target values

    Returns:
        float: error metric
    """
    y_test, y_pred = np.array(y_test), np.array(y_pred)
    return np.mean(np.abs((y_test - y_pred)/y_test)) * 100

In [None]:
y_test_unscaled = unscale_y(y_test, sclr, 24)

In [None]:
y_test_unscaled

In [None]:
mean_abs_percent_error(y_test_unscaled, unscaled_preds)

In [None]:
def plot_predictions_vs_true(ax, y_test, predictions, date_index):
    """Plots both y_true and y_predicted values on the same axes.

    Args:
        ax (axes): Axes on which to plot the predictions.
        y_test (arr): Matrix of actual target variables.
        predictions (arr): Matrix of predicted target variables.
        date_index (arr): Range of y_test indices; plotted along
        the x-axis
    """
    ax.plot(date_index, y_test, label='Actual')
    ax.plot(preds_index, predictions, alpha=0.5, label='Predicted')
    ax.legend(fontsize=12)
    ax.set_xlabel('Date', size=16)
    ax.set_ylabel('Megawatthours', size=16)

In [None]:
preds_index = lag_df['Time'].loc[lag_df['Time'] >= '2020-03-01 00:00:00']
preds_index = preds_index.values

In [None]:
fig, ax = plt.subplots(figsize=(12, 4))
plot_predictions_vs_true(ax, y_test_unscaled, unscaled_preds, preds_index)
plt.show()

## Try on another region

In [None]:
tex = Demand('TEX')

In [None]:
tex.load_data()

In [None]:
tex.dataframe.head()

In [None]:
tex_lag = tex.create_lag_variables(tex.dataframe, 24, 24)

In [None]:
tex_lag.head()

In [None]:
tex_X_train, tex_X_test, tex_y_train, tex_y_test = tex.scale_split(tex_lag, '2020-03-01 00:00:00', sclr)

In [None]:
tex_X_train

In [None]:
tex_X_train, tex_X_test, tex_y_train, tex_y_test = tex.reshape_for_rnn(tex_X_train, tex_X_test, tex_y_train, tex_y_test)

In [None]:
tex_X_train.shape

In [None]:
tex_model = Sequential()

In [None]:
create_layers_SimpleRNN(tex_model, (tex_X_train.shape[1], 1))

In [None]:
tex_preds = compile_and_predict(tex_model, tex_X_train, tex_y_train, tex_X_test)

In [None]:
unscaled_tex_preds = unscale_y(tex_preds, sclr, 24)

In [None]:
unscaled_tex_preds

In [None]:
unscaled_tex_y_test = unscale_y(tex_y_test, sclr, 24)

In [None]:
mean_abs_percent_error(unscaled_tex_y_test, unscaled_tex_preds)

In [None]:
fig, ax = plt.subplots(figsize = (12, 4))
plot_predictions_vs_true(ax, unscaled_tex_y_test, unscaled_tex_preds, preds_index)
plt.show()