In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import text, sequence
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
import matplotlib.pyplot as plt
import random
import glob

In [2]:
epochs = 1000
n_hidden = 128
OBSERVE_LENGTH = 10
PREDICT_LENGTH = 5
dim_input = 15
learning_rate = 0.0001
batch_size = 64

model_name = 'sslstm_epoch_{}_hidden_{}_observe_{}_predict_{}'.format(epochs, n_hidden, OBSERVE_LENGTH, PREDICT_LENGTH)

In [3]:
TRAIN_FOLDERS = '/home/dataset/training_observe_{}_predict_{}/*/'.format(OBSERVE_LENGTH, PREDICT_LENGTH)
TEST_FOLDERS = '/home/dataset/validation_observe_{}_predict_{}/*/'.format(OBSERVE_LENGTH, PREDICT_LENGTH)

train_folders = glob.glob(TRAIN_FOLDERS)
val_ratio = 0.1

print('train folder num:', len(train_folders))

train folder num: 20000


In [4]:
test_folders = glob.glob(TEST_FOLDERS)
test_num = len(test_folders)
print('test folder num:', len(test_folders))

test folder num: 4000


In [5]:
train_X = []
train_y = []
for folder in train_folders:
    file_x = folder + 'X.npy'
    train_X.append(np.load(file_x))
    
    file_y = folder + 'y.npy'
    train_y.append(np.load(file_y))
    
train_X = np.array(train_X)
train_y = np.array(train_y)

test_X = []
test_y = []
for folder in test_folders:
    file_x = folder + 'X.npy'
    test_X.append(np.load(file_x))
    
    file_y = folder + 'y.npy'
    test_y.append(np.load(file_y))
    
test_X = np.array(test_X)
test_y = np.array(test_y)


print("train X: ", train_X.shape)
print("train y: ", train_y.shape)

print("test X: ", test_X.shape)
print("test y: ", test_y.shape)

train X:  (20000, 10, 15)
train y:  (20000, 5, 3)
test X:  (4000, 10, 15)
test y:  (4000, 5, 3)


In [6]:
def build_model():
    opt = optimizers.RMSprop(lr=learning_rate)
    model = tf.keras.Sequential()
    #lstm encoder
    model.add(layers.GRU(n_hidden,
                  input_shape=(OBSERVE_LENGTH, dim_input),
                  return_sequences=False,
                  stateful=False,
                  dropout=0.2))
    model.add(layers.RepeatVector(PREDICT_LENGTH))
    #lstm decoder
    model.add(layers.GRU(n_hidden,
                  return_sequences=True,
                  stateful=False,
                  dropout=0.2))
    model.add(layers.TimeDistributed(layers.Dense(3), input_shape=(PREDICT_LENGTH, None)))
    model.add(layers.Activation('linear'))
    model.compile(loss='mse', optimizer=opt)
    
    print(model.summary())
    return model

In [None]:
# Aggregated Training Error
model = build_model()

checkpointer = ModelCheckpoint(filepath="/home/zg2309/model/{}.h5".format(model_name), verbose=1, save_best_only=True)
history = model.fit(train_X, train_y, validation_split=val_ratio, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[checkpointer])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 128)               55296     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
gru_1 (GRU)                  (None, 5, 128)            98688     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 3)              387       
_________________________________________________________________
activation (Activation)      (None, 5, 3)              0         
Total params: 154,371
Trainable params: 154,371
Non-trainable params: 0
_________________________________________________________________
None
Instructions for updating:
Use

Epoch 23/1000
Epoch 00023: val_loss improved from 0.02075 to 0.01872, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 24/1000
Epoch 00024: val_loss did not improve from 0.01872
Epoch 25/1000
Epoch 00025: val_loss improved from 0.01872 to 0.01836, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 26/1000
Epoch 00026: val_loss did not improve from 0.01836
Epoch 27/1000
Epoch 00027: val_loss did not improve from 0.01836
Epoch 28/1000
Epoch 00028: val_loss did not improve from 0.01836
Epoch 29/1000
Epoch 00029: val_loss improved from 0.01836 to 0.01711, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 30/1000
Epoch 00030: val_loss did not improve from 0.01711
Epoch 31/1000
Epoch 00031: val_loss improved from 0.01711 to 0.01560, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 32/1000
Epoch 00032: val_loss did not

Epoch 56/1000
Epoch 00056: val_loss did not improve from 0.01334
Epoch 57/1000
Epoch 00057: val_loss improved from 0.01334 to 0.01317, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 58/1000
Epoch 00058: val_loss did not improve from 0.01317
Epoch 59/1000
Epoch 00059: val_loss did not improve from 0.01317
Epoch 60/1000
Epoch 00060: val_loss did not improve from 0.01317
Epoch 61/1000
Epoch 00061: val_loss improved from 0.01317 to 0.01218, saving model to /home/zg2309/model/sslstm_epoch_1000_hidden_128_observe_10_predict_5.h5
Epoch 62/1000
Epoch 00062: val_loss did not improve from 0.01218
Epoch 63/1000
Epoch 00063: val_loss did not improve from 0.01218
Epoch 64/1000
Epoch 00064: val_loss did not improve from 0.01218
Epoch 65/1000
Epoch 00065: val_loss did not improve from 0.01218
Epoch 66/1000
Epoch 00066: val_loss did not improve from 0.01218
Epoch 67/1000
Epoch 00067: val_loss did not improve from 0.01218
Epoch 68/1000
Epoch 00068: val_los

In [None]:
# Plot training & validation loss values
fig = plt.gcf()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
fig.savefig('/home/zg2309/history/{}'.format(model_name))
plt.close(fig)

score = model.evaluate(test_X, test_y, batch_size=batch_size)