# Sequence prediction

We'll create a seq2seq LSTM model in Keras which will predict one sequence based on another. In this case, the input sequence is a random permutation of numbers from 1 to 9. The output sequence is the difference between every successive value in the input sequence (with 0 to pad the output sequence at the start).

In [1]:
n_timesteps_in = 8   # Number of values in the input sequence
n_timesteps_out = 8  # Number of values in the output sequence 

n_hidden_units = 200   # Number of hidden units in the LSTM

int_low = 1   # Minimum value possible in a legal input sequence
int_high = 9   # Maxmum value possible in a legal input sequence
pad_value = -9999   # Value of pad (end of the sequence)
unknown_value = 9999  # Value of anything not in the expected vocabulary

batch_size = 256

In [2]:
from random import randint
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector
from keras.preprocessing.sequence import pad_sequences

import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
legal_values = np.arange(int_low,int_high+1,1)
all_values = np.append(np.insert(legal_values, 0, pad_value), unknown_value)
combined_values = np.arange(int_high+1, int_high*2)

# This won't predict -18 and +18. So they'll be our unknown values
all_values = np.append(all_values, combined_values)
all_values = np.append(all_values, -combined_values)
n_features = len(all_values)

In [4]:
def generate_sequence(length):
    return np.random.permutation(legal_values)[:length]

def one_hot_encode(sequence):
    encoding = []
    for value in sequence:
        vector = np.zeros_like(all_values)
        if value in all_values:
            vector[np.where(all_values==value)[0]] = 1
        else:
            vector[np.where(all_values==unknown_value)[0]] = 1
        encoding.append(vector)
        
    encoding = np.array(encoding)
    encoding = encoding.reshape(1, encoding.shape[0], encoding.shape[1])
    return np.array(encoding)

def one_hot_decode(encoded_seq):
    return [all_values[np.argmax(vector)] for vector in encoded_seq]

In [5]:
sequence_in = generate_sequence(8)
sequence_transform = np.insert(np.diff(sequence_in), 0, 0)
sequence_out = np.ones_like(sequence_in)*pad_value
sequence_out[:8] = sequence_transform[:8]

In [6]:
def get_pair(n_in, n_out):
    # generate random sequence from possible values
    sequence_in = generate_sequence(n_in)
    sequence_transform = np.insert(np.diff(sequence_in), 0, 0)
    sequence_out = np.ones_like(sequence_in)*pad_value
    sequence_out[:n_out] = sequence_transform[:n_out]
    
    # One Hot Encode the input values
    X = one_hot_encode(sequence_in)
    y = one_hot_encode(sequence_out)
    
    return X,y

In [7]:
def get_batch(n_in, n_out, batch_size):
    
    while True:
        
        X_batch = []
        y_batch = []
        
        for idx in range(batch_size):
            
            X,y = get_pair(n_in, n_out)
            X_batch.append(X[0])
            y_batch.append(y[0])
            
        yield np.array(X_batch), np.array(y_batch)

In [None]:
# define model
model = Sequential()
model.add(LSTM(n_hidden_units, input_shape=(n_timesteps_in, n_features)))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(n_hidden_units, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation="softmax")))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

In [None]:
num_epochs = 15
training_steps = 1000
validation_steps = 100

train_generator = get_batch(n_timesteps_in, n_timesteps_out, batch_size)
validate_generator = get_batch(n_timesteps_in, n_timesteps_out, batch_size)

history = model.fit_generator(train_generator, steps_per_epoch=training_steps, epochs=num_epochs, 
                              validation_data=validate_generator, validation_steps=validation_steps,
                              verbose=2)

Epoch 1/15
 - 81s - loss: 2.8934 - acc: 0.1738 - val_loss: 2.7325 - val_acc: 0.1835
Epoch 2/15
 - 76s - loss: 2.5357 - acc: 0.2037 - val_loss: 2.3190 - val_acc: 0.2324
Epoch 3/15
 - 71s - loss: 1.9871 - acc: 0.3192 - val_loss: 1.5351 - val_acc: 0.4506
Epoch 4/15
 - 71s - loss: 1.1586 - acc: 0.5737 - val_loss: 0.8839 - val_acc: 0.6603
Epoch 5/15
 - 71s - loss: 0.6639 - acc: 0.7655 - val_loss: 0.5222 - val_acc: 0.8196
Epoch 6/15
 - 71s - loss: 0.4263 - acc: 0.8629 - val_loss: 0.3134 - val_acc: 0.9180
Epoch 7/15
 - 72s - loss: 0.2863 - acc: 0.9159 - val_loss: 0.2064 - val_acc: 0.9479
Epoch 8/15
 - 72s - loss: 0.1972 - acc: 0.9451 - val_loss: 0.1096 - val_acc: 0.9840
Epoch 9/15
 - 72s - loss: 0.1394 - acc: 0.9624 - val_loss: 0.0604 - val_acc: 0.9940
Epoch 10/15
 - 72s - loss: 0.1049 - acc: 0.9718 - val_loss: 0.0394 - val_acc: 0.9967
Epoch 11/15
 - 74s - loss: 0.0828 - acc: 0.9777 - val_loss: 0.0252 - val_acc: 0.9983
Epoch 12/15
 - 72s - loss: 0.0681 - acc: 0.9817 - val_loss: 0.0182 - val_a

In [None]:
# Check a few example predictions to sanity check trained model
num_examples=10
for idx in range(num_examples):
    X,y = get_pair(n_timesteps_in, n_timesteps_out)
    yhat = model.predict(X, verbose=0)
    print("*"*20)
    print("Test case #{}".format(idx+1))
    print("Input = ", one_hot_decode(X[0]))
    print("Expected Output:", one_hot_decode(y[0]))
    print("Model Predicted Output:", one_hot_decode(yhat[0]))
    print("*"*20)
    print("\n")

In [None]:
X = one_hot_encode([3,2, 4,5,-3,2,1,4])
yhat = model.predict(X, verbose=0)
print("*"*20)
print("Manual Test Case".format(idx+1))
print("Input = ", one_hot_decode(X[0]))
print("Expected Output:", one_hot_decode(y[0]))
print("Model Predicted Output:", one_hot_decode(yhat[0]))
print("*"*20)
print("\n")