In [1]:
from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch.utils import data
import os
from os import listdir
from os.path import join, abspath
import itertools
import re
import random
import time
from torch.autograd import Variable
from scipy.signal import tukey
from torch.utils import data
from tqdm.notebook import tqdm
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
import warnings
warnings.filterwarnings("ignore")

IMG_DIR = '/home/eliza/mlhc/imgs'
DIR = abspath('RT_Cases_NU_HRPCI')
PREPROCESS_DIR = abspath('../data/data')
CONTINOUS_DIR = join(PREPROCESS_DIR, 'continuous')
WINDOW_SIZE = 15000
STEP_SIZE = 250
MIN_VALID_AOP = 50
MAX_VALID_AOP = 200
SLICES_DIR = join(PREPROCESS_DIR, 'slices')

In [None]:
def scale_data(data_x, data_y, out_pos = 0, return_current_avg_std = False):
    """ 
    Arg:
        data_x: features
        data_y: labels
        out_pos: the position of feature of which average and stand deviation will be returned.
    returns:
        1. Normalized features and labels
        2. Average and standard deviation of the selected feature.
    """
    avg = data_x[:,:,out_pos].mean()
    std = data_x[:,:,out_pos].std()
    c_avg = data_x[:,:,1].mean()
    c_std = data_x[:,:,1].std()
    for i in range(data_x.shape[-1]):
        data_x[:,:,i] = (data_x[:,:,i] - data_x[:,:,i].mean())/data_x[:,:,i].std()
    data_y = (data_y-avg)/std
    if return_current_avg_std:
        return data_x, data_y, (avg, std), (c_avg, c_std)   
    else:
        return data_x, data_y, (avg, std)

class Dataset(data.Dataset):
    def __init__(self, X, Y, lst_index, output_steps, position_embedding = (False)):
        """
        Args:
            lst_index: indexes of observations in the dataset.
            output_steps: Forecasting Horizon.
        """
        self.X = X[lst_index]
        self.Y = Y[lst_index]
        self.output_steps = output_steps
        
    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, index):
        x = self.X[index]
        y = self.Y[index][:self.output_steps]
        return x, y

In [None]:
TREND = 'ids' # id, is, ds
X_train = torch.load(os.path.abspath('../data/data/at_train_10min/%s_x_train' % TREND))
Y_train = torch.load(os.path.abspath('../data/data/at_train_10min/%s_y_train' % TREND))
sample_size, pred_length, feature_count = X_train.shape

TEST_TREND = 'ids'
X_test = torch.load(os.path.abspath('../data/data/at_test_10min/%s_x_test' % TEST_TREND))
Y_test = torch.load(os.path.abspath('../data/data/at_test_10min/%s_y_test' % TEST_TREND))
test_input_size = X_test.shape[0]

TUNING = False  
test_size = 1000 if TUNING else min(sample_size // 4 * 1, test_input_size)
train_valid_size = test_size * 4
training_size = test_size * 3
validation_size = test_size * 1
print('Total: %d\nTraining: %d, Validation: %d, Test: %d' % (train_valid_size, training_size, validation_size, test_size))
print('Test set: %d ~ %d'% (test_input_size-test_size, test_input_size))

X_all = torch.cat([X_train[:train_valid_size], X_test[test_input_size-test_size: test_input_size]])
Y_all = torch.cat([Y_train[:train_valid_size], Y_test[test_input_size-test_size: test_input_size]])
X, Y, (avg, std), (c_avg, c_std) = scale_data(X_all, Y_all, out_pos = 0, return_current_avg_std = True)

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, dropout_rate):
        """
        Args:
            input_dim: the dimension of input sequences.
            hidden_dim: number hidden units.
            num_layers: number of encode layers.
            dropout_rate: recurrent dropout rate.
        """
        super(Encoder, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, 
                            bidirectional = True, dropout = dropout_rate, batch_first = True)
        
    def forward(self, source):
        """
        Args:
            source: input tensor(batch_size*input dimension)
        Return:
            outputs: Prediction
            concat_hidden: hidden states
        """
        outputs, hidden = self.lstm(source)
        return outputs, hidden
    
class Decoder(nn.Module):
    def __init__(self, output_dim, hidden_dim, num_layers, dropout_rate):
        """
        Args:
            output_dim: the dimension of output sequences.
            hidden_dim: number hidden units.
            num_layers: number of code layers.
            dropout_rate: recurrent dropout rate.
        """
        super(Decoder, self).__init__()
        
        # Since the encoder is bidirectional, decoder has double hidden size
        self.lstm = nn.LSTM(output_dim, hidden_dim*2, num_layers = num_layers, 
                            dropout = dropout_rate, batch_first = True)
        
        self.out = nn.Linear(hidden_dim*2, output_dim)
      
    def forward(self, x, hidden):
        """
        Args:
            x: prediction from previous prediction.
            hidden: hidden states from previous cell.
        Returns:
            1. prediction for current step.
            2. hidden state pass to next cell.
        """
        output, hidden = self.lstm(x, hidden)   
        prediction = self.out(output.float())
        return prediction, hidden     
    
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        """
        Args:
            encoder: Encoder object.
            decoder: Decoder object.
            device: 
        """
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, source, target_tensor):
        """
        Args:
            source: input tensor.
            target_length: forecasting steps.
        Returns:
            total prediction
        """
        batch_size = source.size(0) 
        input_length = source.size(1) 
        target_length = target_tensor.shape[1]
        output_dim = target_tensor.shape[-1]
        encoder_hidden = (torch.zeros(self.encoder.num_layers*2, batch_size, self.encoder.hidden_dim, device=device),
                          torch.zeros(self.encoder.num_layers*2, batch_size, self.encoder.hidden_dim, device=device))
        encoder_output, encoder_hidden = self.encoder(source)
        
        # Concatenate the hidden states of both directions.
        num_layers = int(encoder_hidden[0].shape[0]/2)
        h = torch.cat([encoder_hidden[0][0:self.encoder.num_layers,:,:], 
                       encoder_hidden[0][-self.encoder.num_layers:,:,:]], 
                      dim=2, out=None).to(device)
        c = torch.cat([encoder_hidden[1][0:self.encoder.num_layers,:,:], 
                       encoder_hidden[1][-self.encoder.num_layers:,:,:]], 
                      dim=2, out=None).to(device)
        concat_hidden = (h, c)
        
        
        outputs = torch.zeros(batch_size, target_length, output_dim).to(self.device)
        decoder_output = torch.zeros((batch_size, 1, output_dim), device = self.device)
        decoder_hidden = concat_hidden
        
        for t in range(target_length):  
            decoder_output, decoder_hidden = self.decoder(decoder_output, decoder_hidden)
            outputs[:,t:t+1,:] = decoder_output
        return outputs

In [None]:
def run_epoch_train(model, data_generator, model_optimizer, criterion):
    """
    Args:
        model: RNN model.
        data_generator: data.DataLoader object.
        model_optimizer: optimizer.
        criterion: loss function
    Returns:
        Root Mean Square Error on Training Dataset
    """
    MSE = []
    for x, y in data_generator:
        # The input shape for nn.conv1d should sequence_length * batch_size * #features
        input_tensor, target_tensor = x.to(device).float(), y.to(device).float()
        model_optimizer.zero_grad()
        loss = 0
        output = model(input_tensor, target_tensor).reshape(target_tensor.shape)
        num_iter = output.size(0)
        for ot in range(num_iter):
            loss += criterion(output[ot], target_tensor[ot])
        MSE.append(loss.item()/num_iter)
        loss.backward()
        model_optimizer.step()
    
    return round(np.sqrt(np.mean(MSE)), 5)
 

def run_epoch_eval(model, data_generator, criterion, return_pred = False):
    """
    Args:
        model: CNN model.
        data_generator: data.DataLoader object.
        criterion: loss function
    Returns:
        Root Mean Square Error on evaluation datasets.
    """
    with torch.no_grad():
        MSE = []
        preds = []
        for x, y in data_generator:
            input_tensor, target_tensor = x.to(device).float(), y.to(device).float()
            loss = 0
            output = model(input_tensor, target_tensor).reshape(target_tensor.shape)
            preds.append(output.cpu().detach().numpy())
            num_iter = output.size(0)
            
            for ot in range(num_iter):
                loss += criterion(output[ot], target_tensor[ot])
            MSE.append(loss.item()/num_iter)
            
    if return_pred == True:
        preds =  np.concatenate(preds).squeeze(-1)
        return round(np.sqrt(np.mean(MSE)), 5), preds
    else:
        return round(np.sqrt(np.mean(MSE)), 5)


def train_model(model, X, Y, learning_rate, output_steps, batch_size, train_idx, valid_idx, test_idx, test=False, return_pred=False):
    # Initialize the model and define optimizer, learning rate decay and criterion
    optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size= 5, gamma=0.8)
    criterion = nn.MSELoss()
    
    # Split dataset into training set, validation set and test set.
    train_rmse, train_set = [], Dataset(X, Y, train_idx, output_steps)
    valid_rmse, valid_set = [], Dataset(X, Y, valid_idx, output_steps)
    if test:
        test_rmse, test_set = [], Dataset(X, Y, test_idx, output_steps)
    
    min_loss = 1000
    best_model = 0
    best_preds = 0
    min_valid_loss = 1000
    
    for i in tqdm(range(200)):
        start = time.time()
        scheduler.step()
        train_generator = data.DataLoader(train_set, batch_size = batch_size, shuffle = True)
        valid_generator = data.DataLoader(valid_set, batch_size = batch_size, shuffle = False)
        if test:
            test_generator = data.DataLoader(test_set, batch_size = batch_size, shuffle = False)

        model.train()
        train_rmse.append(run_epoch_train(model, train_generator, optimizer, criterion))
            
        model.eval()
        rmse, predictions = run_epoch_eval(model,  valid_generator, criterion, return_pred = True)
        valid_rmse.append(rmse)
        
        if test:
            if return_pred:
                t_rmse, test_predictions = run_epoch_eval(model, test_generator, criterion, return_pred = True)
            else:
                t_rmse = run_epoch_eval(model, test_generator, criterion, return_pred = False)
            test_rmse.append(t_rmse)
        
        if valid_rmse[-1] < min_loss:
            min_loss = valid_rmse[-1]
            best_model = model
            min_valid_loss = valid_rmse[-1]
            best_preds = predictions
            min_valid_loss = valid_rmse[-1]
            
        if (len(train_rmse) > 15 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
            break
            
    end = time.time()       
    print(("Epoch %d:"%(i+1)), ("Loss: %f; "%train_rmse[-1]),("valid_loss: %f; "%valid_rmse[-1]), 
          ("Time: %f; "%round(end - start,5)))

    if test:
        if return_pred:
            return best_model, (train_rmse,valid_rmse),  best_preds, min_valid_loss, test_rmse, test_predictions
        return best_model, (train_rmse,valid_rmse),  best_preds, min_valid_loss, test_rmse
    return best_model, (train_rmse,valid_rmse),  best_preds, min_valid_loss

## Best Param

In [None]:
learning_rate = 0.01
dropout_rate = 0.6
num_layers = 1
hidden_dim = 128

input_steps = 60
output_steps = 60
input_size = 2
output_size = 1

train_idx = list(range(training_size))
valid_idx = list(range(training_size, train_valid_size))
test_idx = list(range(train_valid_size, train_valid_size + test_size))

encoder = Encoder(input_size, hidden_dim, num_layers, dropout_rate)
decoder = Decoder(output_size, hidden_dim, num_layers, dropout_rate)
model = Seq2Seq(encoder, decoder, device).to(device)
model, loss, preds, min_valid_loss, test_rmse = train_model(
    model, X, Y, learning_rate, output_steps = output_steps, batch_size = 64,
    train_idx = train_idx, valid_idx = valid_idx, test_idx = test_idx, test=True)

print({
    'learning_rate': learning_rate,
    'dropout_rate': dropout_rate,
    'num_layers':num_layers,
    'hidden_dim': hidden_dim,
#     'model_state_dict': model.state_dict(),
#     'loss': loss,
    'min_valid_loss': min_valid_loss,
#     'preds':preds,
})

In [None]:
torch.save({
            'learning_rate': learning_rate,
            'dropout_rate': dropout_rate,
            'num_layers':num_layers,
            'hidden_dim': hidden_dim,
            'model_state_dict': model.state_dict(),
            'loss': loss,
            'min_valid_loss': min_valid_loss,
            'preds':preds,
#             'test_preds': test_preds,
            },"/home/eliza/mlhc/tune/horizon/lstms_at/aop_at_10min.pt")

In [None]:
train_rmse, valid_rmse = loss
plt.plot(valid_rmse)
plt.plot(test_rmse)
print('Train: %s, Test: %s, Test RMSE: %f' % (TREND, TEST_TREND, test_rmse[-1]) )

In [None]:
def scale_with_avg_std(data_x, data_y, avg, std, c_avg, c_std):
    data_x[:,:,0] = (data_x[:,:,0] - avg)/std
    data_x[:,:,1] = (data_x[:,:,1] - c_avg)/c_std
    data_y = (data_y-avg)/std
    return data_x, data_y

t_rmse_trend_dict = {}
def test_trend(trend):
    X_test_trend = torch.load(os.path.abspath('../data/data/at_test_10min/%s_x_test' % trend))[test_input_size-test_size: test_input_size]
    Y_test_trend = torch.load(os.path.abspath('../data/data/at_test_10min/%s_y_test' % trend))[test_input_size-test_size: test_input_size]
    X_test_trend, Y_test_trend = scale_with_avg_std(X_test_trend, Y_test_trend, avg, std, c_avg, c_std)
    test_trend_size = Y_test_trend.shape[0]
    criterion = nn.MSELoss()
    test_rmse, test_set = [], Dataset(X_test_trend, Y_test_trend, np.array(range(test_trend_size)), output_steps)
    test_generator = data.DataLoader(test_set, batch_size = 64, shuffle = False)
    t_rmse_trend, test_predictions_trend = run_epoch_eval(model, test_generator, criterion, return_pred = True)
    t_rmse_trend_dict[trend] = t_rmse_trend
    
for trend in ['i', 'd', 's']:
    test_trend(trend)
print('Train: %s, test rmse: %s' % (TREND, t_rmse_trend_dict))

## Evaluation on I, D, S

In [None]:
def scale_with_avg_std(data_x, data_y, avg, std, c_avg, c_std):
    data_x[:,:,0] = (data_x[:,:,0] - avg)/std
    data_x[:,:,1] = (data_x[:,:,1] - c_avg)/c_std
    data_y = (data_y-avg)/std
    return data_x, data_y

t_rmse_trend_dict = {}
def test_trend(trend):
    X_test_trend = torch.load(os.path.abspath('../data/data/at_test/%s_x_test' % trend))[: test_size]
    Y_test_trend = torch.load(os.path.abspath('../data/data/at_test/%s_y_test' % trend))[: test_size]
    X_test_trend, Y_test_trend = scale_with_avg_std(X_test_trend, Y_test_trend, avg, std, c_avg, c_std)
    test_trend_size = Y_test_trend.shape[0]
    criterion = nn.MSELoss()
    test_rmse, test_set = [], Dataset(X_test_trend, Y_test_trend, np.array(range(test_trend_size)), output_steps)
    test_generator = data.DataLoader(test_set, batch_size = 64, shuffle = False)
    t_rmse_trend, test_predictions_trend = run_epoch_eval(model, test_generator, criterion, return_pred = True)
    t_rmse_trend_dict[trend] = t_rmse_trend
    
for trend in ['i', 'd', 's']:
    test_trend(trend)
print('Train: %s, test rmse: %s' % (TREND, t_rmse_trend_dict))

In [None]:
lstm_rmse_dict = {}
existing_cycles = [int(cycle_count) for cycle_count in os.listdir("/home/eliza/mlhc/tune/rmse/lstm_at")]
CYCLE = 0 if len(existing_cycles) == 0 else int(sorted(existing_cycles)[-1]) + 1
LSTM_RMSE_DIR = "/home/eliza/mlhc/tune/rmse/lstm_at/{}".format(CYCLE)
if not os.path.exists(LSTM_RMSE_DIR):
    os.makedirs(LSTM_RMSE_DIR)
print('Save cycle as: {}'.format(CYCLE))

if os.path.exists(join(LSTM_RMSE_DIR, 'aop.pt')):
    lstm_rmse_dict = torch.load(join(LSTM_RMSE_DIR, 'aop.pt'))
lstm_rmse_dict[TREND] = [test_rmse[-1], t_rmse_trend_dict['i'], t_rmse_trend_dict['d'], t_rmse_trend_dict['s']]
torch.save(lstm_rmse_dict, join(LSTM_RMSE_DIR, 'aop.pt'))
torch.load(join(LSTM_RMSE_DIR, 'aop.pt'))

## Random Search

In [10]:
param_grid = {
    'learning rate': [0.1,0.01, 0.001, 0.0001],
    'dropout_rate': list(np.linspace(0.2, 0.8, 4)),
#     'num_layers': list(range(1,4)),
    'hidden_dim': list(range(64, 512, 64)),
}
com = 1
for x in param_grid.values():
    com *= len(x)
# Only use 20 percent of total number of combinations
max_evals = int(com*0.2)
random_params = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}
learning_rate = random_params["learning rate"]
dropout_rate = random_params["dropout_rate"]
# num_layers = random_params["num_layers"]
hidden_dim = random_params["hidden_dim"]

In [15]:
list(range(64, 512, 64))

[64, 128, 192, 256, 320, 384, 448]

In [13]:
random_params_dict = {}
for i in range(0, max_evals):
    random_params_dict[i] = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}
# print(random_params_dict)

In [14]:
random_params_dict

{0: {'learning rate': 0.01, 'dropout_rate': 0.8, 'hidden_dim': 384},
 1: {'learning rate': 0.001, 'dropout_rate': 0.4, 'hidden_dim': 192},
 2: {'learning rate': 0.01, 'dropout_rate': 0.4, 'hidden_dim': 192},
 3: {'learning rate': 0.01, 'dropout_rate': 0.2, 'hidden_dim': 128},
 4: {'learning rate': 0.1, 'dropout_rate': 0.4, 'hidden_dim': 384},
 5: {'learning rate': 0.001, 'dropout_rate': 0.8, 'hidden_dim': 448},
 6: {'learning rate': 0.0001, 'dropout_rate': 0.2, 'hidden_dim': 64},
 7: {'learning rate': 0.0001, 'dropout_rate': 0.4, 'hidden_dim': 384},
 8: {'learning rate': 0.001,
  'dropout_rate': 0.6000000000000001,
  'hidden_dim': 384},
 9: {'learning rate': 0.01,
  'dropout_rate': 0.6000000000000001,
  'hidden_dim': 256},
 10: {'learning rate': 0.01, 'dropout_rate': 0.4, 'hidden_dim': 256},
 11: {'learning rate': 0.01, 'dropout_rate': 0.4, 'hidden_dim': 320},
 12: {'learning rate': 0.001,
  'dropout_rate': 0.6000000000000001,
  'hidden_dim': 256},
 13: {'learning rate': 0.0001, 'dropo

In [None]:
random_params_dict = {0: {u'learning rate': 0.001, u'dropout_rate': 0.2, u'hidden_dim': 128, u'num_layers': 1}, 1: {u'learning rate': 0.001, u'dropout_rate': 0.4, u'hidden_dim': 320, u'num_layers': 3}, 2: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 384, u'num_layers': 2}, 3: {u'learning rate': 0.01, u'dropout_rate': 0.2, u'hidden_dim': 448, u'num_layers': 3}, 4: {u'learning rate': 0.001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 192, u'num_layers': 2}, 5: {u'learning rate': 0.01, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 256, u'num_layers': 3}, 6: {u'learning rate': 0.1, u'dropout_rate': 0.4, u'hidden_dim': 192, u'num_layers': 1}, 7: {u'learning rate': 0.01, u'dropout_rate': 0.8, u'hidden_dim': 448, u'num_layers': 3}, 8: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 128, u'num_layers': 1}, 9: {u'learning rate': 0.001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 192, u'num_layers': 3}, 10: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 256, u'num_layers': 2}, 11: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 64, u'num_layers': 3}, 12: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 128, u'num_layers': 1}, 13: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 448, u'num_layers': 1}, 14: {u'learning rate': 0.001, u'dropout_rate': 0.8, u'hidden_dim': 192, u'num_layers': 1}, 15: {u'learning rate': 0.001, u'dropout_rate': 0.8, u'hidden_dim': 448, u'num_layers': 3}, 16: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 3}, 17: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 128, u'num_layers': 1}, 18: {u'learning rate': 0.0001, u'dropout_rate': 0.4, u'hidden_dim': 448, u'num_layers': 1}, 19: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 1}, 20: {u'learning rate': 0.0001, u'dropout_rate': 0.4, u'hidden_dim': 128, u'num_layers': 1}, 21: {u'learning rate': 0.01, u'dropout_rate': 0.8, u'hidden_dim': 256, u'num_layers': 1}, 22: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 256, u'num_layers': 1}, 23: {u'learning rate': 0.1, u'dropout_rate': 0.4, u'hidden_dim': 192, u'num_layers': 2}, 24: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 128, u'num_layers': 2}, 25: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 320, u'num_layers': 2}, 26: {u'learning rate': 0.01, u'dropout_rate': 0.8, u'hidden_dim': 448, u'num_layers': 3}, 27: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 384, u'num_layers': 1}, 28: {u'learning rate': 0.01, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 320, u'num_layers': 1}, 29: {u'learning rate': 0.1, u'dropout_rate': 0.2, u'hidden_dim': 320, u'num_layers': 2}, 30: {u'learning rate': 0.0001, u'dropout_rate': 0.4, u'hidden_dim': 192, u'num_layers': 3}, 31: {u'learning rate': 0.01, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 448, u'num_layers': 1}, 32: {u'learning rate': 0.01, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 64, u'num_layers': 1}, 33: {u'learning rate': 0.0001, u'dropout_rate': 0.4, u'hidden_dim': 64, u'num_layers': 3}, 34: {u'learning rate': 0.1, u'dropout_rate': 0.2, u'hidden_dim': 64, u'num_layers': 1}, 35: {u'learning rate': 0.001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 320, u'num_layers': 2}, 36: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 448, u'num_layers': 1}, 37: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 320, u'num_layers': 2}, 38: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 64, u'num_layers': 2}, 39: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 128, u'num_layers': 3}, 40: {u'learning rate': 0.001, u'dropout_rate': 0.4, u'hidden_dim': 256, u'num_layers': 3}, 41: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 1}, 42: {u'learning rate': 0.01, u'dropout_rate': 0.8, u'hidden_dim': 128, u'num_layers': 3}, 43: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 3}, 44: {u'learning rate': 0.01, u'dropout_rate': 0.4, u'hidden_dim': 64, u'num_layers': 3}, 45: {u'learning rate': 0.01, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 448, u'num_layers': 3}, 46: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 256, u'num_layers': 1}, 47: {u'learning rate': 0.01, u'dropout_rate': 0.2, u'hidden_dim': 448, u'num_layers': 3}, 48: {u'learning rate': 0.01, u'dropout_rate': 0.2, u'hidden_dim': 320, u'num_layers': 3}, 49: {u'learning rate': 0.01, u'dropout_rate': 0.4, u'hidden_dim': 64, u'num_layers': 1}, 50: {u'learning rate': 0.01, u'dropout_rate': 0.2, u'hidden_dim': 256, u'num_layers': 3}, 51: {u'learning rate': 0.01, u'dropout_rate': 0.2, u'hidden_dim': 384, u'num_layers': 3}, 52: {u'learning rate': 0.01, u'dropout_rate': 0.4, u'hidden_dim': 192, u'num_layers': 1}, 53: {u'learning rate': 0.0001, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 3}, 54: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 320, u'num_layers': 3}, 55: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 192, u'num_layers': 1}, 56: {u'learning rate': 0.1, u'dropout_rate': 0.2, u'hidden_dim': 192, u'num_layers': 1}, 57: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 64, u'num_layers': 1}, 58: {u'learning rate': 0.1, u'dropout_rate': 0.4, u'hidden_dim': 384, u'num_layers': 2}, 59: {u'learning rate': 0.001, u'dropout_rate': 0.8, u'hidden_dim': 384, u'num_layers': 3}, 60: {u'learning rate': 0.0001, u'dropout_rate': 0.6000000000000001, u'hidden_dim': 64, u'num_layers': 3}, 61: {u'learning rate': 0.01, u'dropout_rate': 0.8, u'hidden_dim': 64, u'num_layers': 1}, 62: {u'learning rate': 0.001, u'dropout_rate': 0.4, u'hidden_dim': 64, u'num_layers': 2}, 63: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 256, u'num_layers': 1}, 64: {u'learning rate': 0.0001, u'dropout_rate': 0.2, u'hidden_dim': 256, u'num_layers': 1}, 65: {u'learning rate': 0.1, u'dropout_rate': 0.2, u'hidden_dim': 384, u'num_layers': 3}, 66: {u'learning rate': 0.1, u'dropout_rate': 0.8, u'hidden_dim': 384, u'num_layers': 3}}

In [None]:
input_steps = 30
output_steps = 60
input_size = 2
output_size = 1

train_idx = list(range(3000))
valid_idx = list(range(3000,4000))
test_idx = list(range(4000,5000))
for i in tqdm(range(31, max_evals)):
    """
    random_params = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}
    learning_rate = random_params["learning rate"]
    dropout_rate = random_params["dropout_rate"]
    num_layers = random_params["num_layers"]
    hidden_dim = random_params["hidden_dim"]
    """
    torch.cuda.empty_cache()
    encoder = Encoder(input_size, hidden_dim, num_layers, dropout_rate)
    decoder = Decoder(output_size, hidden_dim, num_layers, dropout_rate)
    model = Seq2Seq(encoder, decoder, device).to(device)
    model, loss, preds, min_valid_loss = train_model(model, X, Y, 
                                                     learning_rate, 
                                                     output_steps = output_steps, 
                                                     batch_size = 64,
                                                     train_idx = train_idx,
                                                     valid_idx = valid_idx,
                                                     test_idx = test_idx)  
    try:
        torch.save({
                'learning_rate': learning_rate,
                'dropout_rate': dropout_rate,
                'num_layers':num_layers,
                'hidden_dim': hidden_dim,
                'model_state_dict': model.state_dict(),
                'loss': loss,
                'min_valid_loss': min_valid_loss,
                'preds':preds,
                },"/home/eliza/mlhc/tune/final/lstms_at/aop-" + str(i) +".pt")
    except:
        continue

## RMSE for varying forecast horizon

In [None]:
# Best parameters selected by random search.
learning_rate = 0.01
dropout_rate = 0.6
num_layers = 1
hidden_dim = 128

input_steps = 30
output_steps = 60
input_size = 2
output_size = 1


learning_rate = best_params["learning_rate"]
dropout_rate = best_params["dropout_rate"]
num_layers = best_params["num_layers"]
hidden_dim = best_params["hidden_dim"]

torch.cuda.empty_cache()
encoder = Encoder(input_size, hidden_dim, num_layers, dropout_rate)
decoder = Decoder(output_size, hidden_dim, num_layers, dropout_rate)
model = Seq2Seq(encoder, decoder, device).to(device)
model, loss, preds, min_valid_loss = train_model(model, learning_rate, output_steps, batch_size = 64)

In [None]:
torch.save({
            'learning_rate': learning_rate,
            'dropout_rate': dropout_rate,
            'num_layers':num_layers,
            'hidden_dim': hidden_dim,
            'model_state_dict': model.state_dict(),
            'loss': loss,
            'min_valid_loss': min_valid_loss,
            'preds':preds,
            },"/home/rui/KDD/Tune/Horizon/LSTMs/101AOP-"+ str(output_steps) + "-" + str(k) + ".pt")    

## Best I-D-S Model

In [None]:
def get_model_state(model_name):
    model_state = torch.load(os.path.abspath('../tune/horizon/%s/aop_at.pt' % model_name))['model_state_dict']
    return model_state

In [None]:
# Best parameters selected by random search.
learning_rate = 0.01
dropout_rate = 0.6
num_layers = 1
hidden_dim = 128

input_steps = 30
output_steps = 60
input_size = 2
output_size = 1

torch.cuda.empty_cache()
encoder = Encoder(input_size, hidden_dim, num_layers, dropout_rate)
decoder = Decoder(output_size, hidden_dim, num_layers, dropout_rate)
model = Seq2Seq(encoder, decoder, device).to(device)
        
model.load_state_dict(get_model_state('lstms_at'))
model.eval()

## Test Case with One Patient

In [None]:
# validate rt to at
def rt_to_at(rt):
    at = rt.reshape(rt.shape[0], int(WINDOW_SIZE/STEP_SIZE), STEP_SIZE, 2).mean(2)
    return at
random_patient_index = np.random.randint(0, len(listdir(SLICES_DIR)))
test_rt_data_file = listdir(SLICES_DIR)[random_patient_index]
test_rt_data = torch.load(join(SLICES_DIR, test_rt_data_file))
test_at_data = rt_to_at(test_rt_data)
plt.figure(figsize=(15,2.5))
random_data_index = np.random.randint(0, len(test_rt_data))
rt_data_sample = test_rt_data[random_data_index][:, 0]
at_data_sample = test_at_data[random_data_index][:, 0].view(-1, 1).repeat(1,  STEP_SIZE).reshape(WINDOW_SIZE)
plt.plot(rt_data_sample, label="RT", linewidth=0.3)
plt.plot(at_data_sample, label="AT", linewidth=3)
plt.show()

In [None]:
PATIENT_ID = '1715047' # 171504 data points, 17150 seconds, 43.06667 minutes
x = torch.load(os.path.abspath('../data/data/patient/%s_at_x' % PATIENT_ID))
y = torch.load(os.path.abspath('../data/data/patient/%s_at_y' % PATIENT_ID))
sample_size, pred_length, feature_count = x.shape

def scale_with_avg_std(data_x, data_y, avg, std, c_avg, c_std):
    data_x[:,:,0] = (data_x[:,:,0] - avg)/std
    data_x[:,:,1] = (data_x[:,:,1] - c_avg)/c_std
    data_y = (data_y-avg)/std
    return data_x, data_y
x, y = scale_with_avg_std(x, y, avg, std, c_avg, c_std)

def scale_with_avg_std(data_x, data_y, avg, std, c_avg, c_std):
    data_x[:,:,0] = (data_x[:,:,0] - avg)/std
    data_x[:,:,1] = (data_x[:,:,1] - c_avg)/c_std
    data_y = (data_y-avg)/std
    return data_x, data_y

criterion = nn.MSELoss()
test_rmse, test_set = [], Dataset(x, y, np.array(range(sample_size)), output_steps)
test_generator = data.DataLoader(test_set, batch_size = 64, shuffle = False)
t_rmse, test_predictions = run_epoch_eval(model, test_generator, criterion, return_pred = True)

In [None]:
test_predictions_scaled = torch.from_numpy(test_predictions)
true_predictions_scaled = y.squeeze(-1)

# unscale actual and aop_input 
# after * std + avg = original
def unscale_data(scaled_data, avg, std):
    data = scaled_data * std + avg
    return data
test_predictions = unscale_data(test_predictions_scaled, avg, std)
true_predictions = unscale_data(true_predictions_scaled, avg, std)

In [None]:
aop_input = torch.load(os.path.abspath('../data/data/patient/%s_at_x' % PATIENT_ID))[..., 0]
actual = torch.load(os.path.abspath('../data/data/patient/%s_at_y' % PATIENT_ID)).squeeze(-1)

In [None]:
torch.save({
            'patient_rmse': t_rmse,
            'patient_preds': test_predictions,
            'aop_input': aop_input,
            'actual': actual,
            },"/home/eliza/mlhc/tune/horizon/lstm_at_%s.pt" % PATIENT_ID)    

In [None]:
transformer_at_patient = torch.load("/home/eliza/mlhc/tune/horizon/lstm_at_%s.pt" % PATIENT_ID)
aop_input = transformer_at_patient['aop_input']
test_predictions = transformer_at_patient['patient_preds']
print(transformer_at_patient['patient_rmse'])

In [None]:
test_predictions.shape

In [None]:
plt.rcParams["figure.figsize"] = (20,3)
plt.plot(aop_input[:24 * 360 + 1], label = "Ground Truth", linewidth = 0.1, color = "blue")
plt.plot(test_predictions[:24 * 360 + 1], label = "LSTMs(AT)",linewidth = 0.1, color = "orange")


plt.xlabel('Time (hours)')
locs = range(0, 24 * 360 + 1, 360)
labels = [str(label) for label in range(0, 25, 1)]
plt.xticks(locs, labels)

plt.savefig(join(IMG_DIR, "lstm_%s_long.png" % PATIENT_ID), dpi = 400, bbox_inches = "tight")
plt.show()

In [None]:
SUBPLOT_COUNT = 2
DATA_COUNT = 100
OUTPUT_STEP = 30
SUBPLOT_HEIGHT = 3
fig, axes = plt.subplots(SUBPLOT_COUNT, 1, sharex=True, sharey=True, figsize=(15, SUBPLOT_HEIGHT * SUBPLOT_COUNT))

for i in range(SUBPLOT_COUNT):
    plot_aop_input = aop_input[i * DATA_COUNT : (i + 1) * DATA_COUNT].reshape(OUTPUT_STEP * DATA_COUNT)
    plot_test_predictions = test_predictions[i * DATA_COUNT : (i + 1) * DATA_COUNT].reshape(OUTPUT_STEP * DATA_COUNT)
    axes[i].plot(plot_aop_input, label = "Ground Truth", linewidth = 1, color = "blue")
    axes[i].plot(plot_test_predictions, label = "LSTMs(AT)",linewidth = 1, color = "orange")
    axes[i].grid(axis='y', alpha=0.5)
    
# plt.savefig(join(IMG_DIR, "lstm_%s.png" % PATIENT_ID), dpi = 400, bbox_inches = "tight")