In [17]:
##### MODELING LIBRARIES #####
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from keras.callbacks import EarlyStopping
# from sklearn.linear_model import LassoCV

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from patsy import dmatrices, dmatrix

from sklearn.metrics import accuracy_score

##### FORMATTING AND GRAPHING LIBRARIES #####
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##### TIMING AND UTILITY LIBRARIES #####
import sqlalchemy as db
from datetime import datetime
import timeit
from dateutil.relativedelta import relativedelta 
import random

In [18]:
##### IMPORT PREPARED DATA #####
print('Reading pre-built dataset...')
df_load = pd.read_csv('../PJM_Weekly_Model/sample_base_data.csv', index_col = 0, parse_dates = [0])

# Drop all lag columns for us in RNN
lag_sq_cols = [column for column in df_load.columns if 'Lag' in column or 'Sq' in column]
df_load = df_load.drop(columns = lag_sq_cols)

# Drop null rows - should only be losing daylight savings in March
dropped = df_load.shape[0] - df_load.dropna().shape[0]
print('Dropping %s null values...'%(dropped))
df_load = df_load.dropna()

# Convert date, time, holiday columns to categorical variables
for col in ['Month','WeekDay','Day','Hour']:
    df_load[col] = df_load[col].astype('category')

Reading pre-built dataset...
Dropping 1 null values...


In [19]:
##### FINAL DATA PREPARATION #####
print("\nDefining testing and training set...") 

# Set random seed
random.seed(238)

# Use Patsy to create the one-hot encoded dummy variables with interactions 
y, X =  dmatrices('value~Light+WWP+THI+Month+Day+WeekDay+Hour+Holiday',df_load,return_type='dataframe')

# Split data into training and testing data sets with two-year training sample (8760 h/yr * 2)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 17520 / X.shape[0], shuffle = False)

# Standardize both datasets - create fit to use on backcast dataset
ss = StandardScaler()

X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)


Defining testing and training set...


In [20]:
# Create training sequences
train_sequences = TimeseriesGenerator(X_train_ss, y_train['value'], length = 48, batch_size = 512)

# Create test sequences
test_sequences = TimeseriesGenerator(X_test_ss, y_test['value'], length = 48, batch_size = 512)

In [21]:
train_sequences[0][0].shape

(512, 48, 75)

In [23]:
##### RNN MODEL #####
tic = timeit.default_timer()
print("\nBuilding and training model...") 

model = Sequential()
model.add(LSTM(128, input_shape = (48,75), return_sequences = True, activation = 'relu'))
model.add(LSTM(64, return_sequences = True, activation = 'relu'))
model.add(LSTM(32, return_sequences = True, activation = 'relu'))
model.add(LSTM(16, return_sequences = False, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))

model.compile(optimizer = Adam(learning_rate = 0.001), loss = 'mean_squared_error', metrics = 'mean_absolute_percentage_error')

history = model.fit(train_sequences, validation_data = test_sequences, epochs = 10)

toc = timeit.default_timer()
print('RNN fit created in %0.2f seconds' % (toc-tic))


Building and training model...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
RNN fit created in 150.40 seconds


In [None]:
plt.plot(history.history['loss'], label = 'Train Loss')
plt.plot(history.history['val_loss'], label = 'Test Loss')
plt.legend();

In [None]:
plt.plot(history.history['mean_absolute_percentage_error'], label = 'Train MAPE')
plt.plot(history.history['val_mean_absolute_percentage_error'], label = 'Test MAPE')
plt.legend();

In [None]:
y_train['predicted'] = np.append([np.NaN] * 48, model.predict(train_sequences).transpose())

In [None]:
y_test['predicted'] = np.append([np.NaN] * 48, model.predict(test_sequences).transpose())

In [None]:
plt.plot(y_train)
plt.legend(y_test.columns)

In [None]:
plt.plot(y_test['2019-06'])
plt.legend(y_test.columns)

In [None]:
plt.plot(y_test['20190612'])
plt.legend(y_test.columns)