# Gated Recurrent Unit (GRU)

In [1]:
import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Load Datasets

In [7]:
data_dir = './datasets/hourly-energy-consumption/'

In [8]:
df_AEP = pd.read_csv(data_dir + 'AEP_hourly.csv')
df_AEP.head()

Unnamed: 0,Datetime,AEP_MW
0,2004-12-31 01:00:00,13478.0
1,2004-12-31 02:00:00,12865.0
2,2004-12-31 03:00:00,12577.0
3,2004-12-31 04:00:00,12517.0
4,2004-12-31 05:00:00,12670.0


In [29]:
df_COMED = pd.read_csv(data_dir + 'COMED_hourly.csv')
df_COMED.head()

Unnamed: 0,Datetime,COMED_MW
0,2011-12-31 01:00:00,9970.0
1,2011-12-31 02:00:00,9428.0
2,2011-12-31 03:00:00,9059.0
3,2011-12-31 04:00:00,8817.0
4,2011-12-31 05:00:00,8743.0


In [30]:
df_FE = pd.read_csv(data_dir + 'FE_hourly.csv')
df_FE.head()

Unnamed: 0,Datetime,FE_MW
0,2011-12-31 01:00:00,6222.0
1,2011-12-31 02:00:00,5973.0
2,2011-12-31 03:00:00,5778.0
3,2011-12-31 04:00:00,5707.0
4,2011-12-31 05:00:00,5691.0


## Pre-process Datasets

In [23]:
# the scaler objects will be stored in this dictionary 
# so that our output test data from the model can be re-scaled during evaluation
scalers_label = {}

In [25]:
train_x = []
test_x, test_y = {}, {}

for file in tqdm_notebook(os.listdir(data_dir)):
    
    # skip the files we're not using
    if file[-4:] != '.csv' or file == 'pjm_hourly_est.csv':
        continue
        
    df_data = pd.read_csv('{}/{}'.format(data_dir, file), parse_dates=[0])
    
    # pre-process the time data into suitable input formats
    df_data['hour'] = df_data.apply(lambda x: x['Datetime'].hour, axis=1)
    df_data['dayofweek'] = df_data.apply(lambda x: x['Datetime'].dayofweek, axis=1)
    df_data['month'] = df_data.apply(lambda x: x['Datetime'].month, axis=1)
    df_data['dayofyear'] = df_data.apply(lambda x: x['Datetime'].dayofyear, axis=1)
    df_data = df_data.sort_values('Datetime').drop('Datetime', axis=1)
    
    # scale the input data
    sc = MinMaxScaler()
    sc_label = MinMaxScaler()
    
    data = sc.fit_transform(df_data.values)
    
    # obtain the scale for the labels (usage data)
    # so that output can be re-schaled to actual value during evaluation
    sc_label.fit(df_data.iloc[:,0].values.reshape(-1,1))
    scalers_label[file] = sc_label
    
    # define look-back period and split for inputs and labels
    look_back = 90
    inputs = np.zeros((len(data)-look_back, look_back, df_data.shape[1]))
    labels = np.zeros(len(data)-look_back)
    
    for i in range(look_back, len(data)):
        inputs[i-look_back] = data[i-look_back:i]
        labels[i-look_back] = data[i,0]
    inputs = inputs.reshape(-1, look_back, df_data.shape[1])
    labels = labels.reshape(-1, 1)
    
    # split data into train/ test portions and combine all data from different files into a single array
    test_size = int(0.1*len(inputs))
    if len(train_x) == 0:
        train_x = inputs[:-test_size]
        train_y = labels[:-test_size]
    else:
        train_x = np.concatenate((train_x, inputs[:-test_size]))
        train_y = np.concatenate((train_y, labels[:-test_size]))
    
    test_x[file] = (inputs[-test_size:])
    test_y[file] = (labels[-test_size:])

HBox(children=(IntProgress(value=0, max=13), HTML(value='')))




In [26]:
print("Number of training data: " + str(len(train_x)))
print("Number of test data: " + str(len(test_x)))

Number of training data: 980185
Number of test data: 12


## Set Dataloader

In [31]:
BATCH_SIZE = 16

In [32]:
is_cuda = torch.cuda.is_available()

if is_cuda: device = torch.device('cuda')
else: device = torch.device('cpu')

In [33]:
train_dataset = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE, drop_last=True)

In [34]:
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()

print('Input shape:' + str(sample_x.shape))
print('Output shape:' + str(sample_y.shape))

Input shape:torch.Size([16, 90, 5])
Output shape:torch.Size([16, 1])


## Set Configs

In [36]:
input_size = next(iter(train_loader))[0].shape[2]
output_size = 1
hidden_dim=256
n_layers = 2

LR = 0.001
N_EPOCHS = 5

## Build GRU Network

In [45]:
class GRU(nn.Module):
    
    def __init__(self, input_size, hidden_dim, output_size, n_layers, drop_prob=0.2):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.gru_layer = nn.GRU(input_size, hidden_dim, n_layers, dropout=drop_prob, batch_first=True)
        self.relu = nn.ReLU()
        self.fc_layer = nn.Linear(hidden_dim, output_size)

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
        
        return hidden
    
    def forward(self, x, hidden):
        gru_out, hidden = self.gru_layer(x, hidden)
        output = self.fc_layer(self.relu(gru_out[:,-1]))
        
        return output, hidden

#### Initialize GRU Network with hyper-parameters

In [46]:
gru = GRU(input_size, hidden_dim, output_size, n_layers)
gru.to(device)

GRU(
  (gru_layer): GRU(5, 256, num_layers=2, batch_first=True, dropout=0.2)
  (relu): ReLU()
  (fc_layer): Linear(in_features=256, out_features=1, bias=True)
)

## Set Loss Function

In [47]:
mse_loss = nn.MSELoss()

## Set Optimizers

In [48]:
optimizer = torch.optim.Adam(gru.parameters(), lr=LR)

## Train GRU Network

In [49]:
print_every = 200
counter = 0
epoch_times = []

gru.train()
for epoch in range(N_EPOCHS):
    
    tik = time.clock()
    hidden = gru.init_hidden(BATCH_SIZE)
    train_loss = 0.
    
    for inputs, labels in train_loader:
        counter += 1
        hidden = hidden.data
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        gru_out, hidden = gru(inputs.float(), hidden)
        loss = mse_loss(gru_out.squeeze(), labels.float())
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        if counter%print_every == 0:
            print('Epoch: {}...'.format(epoch+1),
                  'Step: {}/{}...'.format(counter, len(train_loader)),
                  'Train Loss: {:.6f}...'.format(train_loss/counter))
    
    tok = time.clock()
    print('Epoch {}/{} Done, Total Loss: {}'.format(epoch, N_EPOCHS, train_loss/len(train_loader)))
    print('Total Time Elapsed: {} seconds'.format(str(tok-tik)))
    epoch_times.append(current_time-start_time)

print('Total Training Time: {} seconds'.format(str(sum(epoch_times))))

  
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 1... Step: 200/61261... Train Loss: 0.029870...


KeyboardInterrupt: 

In [None]:
outputs = []
targets = []
tik = time.clock()

gru.eval()
for i in test_x.keys():
    inputs = torch.from_numpy(np.array(test_x[i]))
    labels = torch.from_numpy(np.array(test_x[i]))
    inputs, labels = inputs.to(device), labels.to(device)
    
    hidden = gru.init_hidden(inputs.shape[0])
    gru_out, hidden = gru(inputs.float(), hidden)
    outputs.append(scalers_label[i].inverse_transform(gru_out.cpu().detach().numpy()).reshape(-1))
    targets.append(scalers_label[i].inverse_transform(labels.numpy()).reshape(-1))

tok = time.clock()
print('Evaluation Time: {}'.format(str(tok-tik)))

sMAPE = 0
for i in range(len(outputs)):
    sMAPE += np.mean(abs(outputs[i]-targets[i]) / (targets[i]+outputs[i]/2) / len(outputs))
print('sMAPE: {}%'.format(sMAPE*100))

  This is separate from the ipykernel package so we can avoid doing imports until


---