# Sequence prediction

We'll create a seq2seq LSTM model in Keras which will predict one sequence based on another. In this case, the input sequence is a random permutation of numbers from 1 to 9. The output sequence is the difference between every successive value in the input sequence (with 0 to pad the output sequence at the start).

In [1]:
from random import randint
import keras
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector

import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Model parameters

We assume that the only valid sequences contain numbers from 1 to 9. Additionally, we know that since the output sequence is the difference between any two input values, then the output values can only range from -18 to +18. However, let's make our dictionary only go from -17 to +17. We'll use the "unknown" class when the values are outside of our known dictionary (e.g. -18 and +18). We'll also use a pad in case we want the input and output sequences to vary in length.

In [2]:
n_timesteps_in = 10   # Number of values in the input sequence
n_timesteps_out = 8   # Number of values in the output sequence 

n_hidden_units = 200  # Number of hidden units in the LSTM

int_low = 1           # Minimum value possible in a legal input sequence
int_high = 9          # Maxmum value possible in a legal input sequence
pad_value = -9999     # Value of pad (end of the sequence)
unknown_value = 9999  # Value of anything not in the expected vocabulary

legal_values = np.arange(int_low, int_high+1)  # [1-9]

# Integers from -17 to +17. These will actually be consider "classes" not "numbers".
# We intentionally skip -18 and +18 to show how model can predict "unknown".
all_values = np.arange(1-int_high*2,int_high*2) 
# Append pad and unknown value classes
all_values = np.append(all_values, [pad_value, unknown_value])

n_features = len(all_values)

## Generating the data
Here we generate the data for the model. Obviously, in a real world scenario we'd be given the input and output data. We need to one hot encode the input and output sequences in order to use categorical cross-entropy as the loss function. We also add padding randomly to the input sequence so that the model can handle variable length inputs.

In [3]:
def generate_sequence(length):
    '''
    Generate a random vector of values from the legal vector of values
    e.g.  [2, 3, 1, 4, 1, 9, 5, 2]
    We'll randomly make the last few entries the pad so that the model
    can handle random sized sequences.
    '''
    out = np.random.choice(legal_values, length)  # Random vector of legal values (with replacement)
    pad = np.random.randint(0,3)
    if pad > 0:
        out[-pad:] = pad_value
    return out

def one_hot_encode(sequence):
    '''
    Convert a vector into a one hot encoded matrix using all possible classes
    in our vocabulary.
    '''
    encoding = []
    for value in sequence:
        vector = np.zeros_like(all_values)
        if value in all_values:
            vector[np.where(all_values==value)[0]] = 1
        else:
            vector[np.where(all_values==unknown_value)[0]] = 1
        encoding.append(vector)
        
    encoding = np.array(encoding)
    encoding = encoding.reshape(1, encoding.shape[0], encoding.shape[1])
    return np.array(encoding)

def one_hot_decode(encoded_seq):
    '''
    Convert the one hot encoding back into the original vocabulary space.
    '''
    return [all_values[np.argmax(vector)] for vector in encoded_seq]

In [4]:
def transform_sequence(sequence):
    '''
    Transform the sequence from the input to the output
    Here we are doing differentiation.
    For example, if the input is [3,  2, 5, 5,  2, 6]
    then the output is:          [0, -1, 3, 0, -3, 4]
    The output is always the difference between two successive input values.
    '''
    return np.insert(np.diff(sequence), 0, 0)

In [5]:
def get_data(n_in, n_out):
    '''
    Generate a random sequence of input and outputs.
    Return the one hot encoded sequences.
    '''
    sequence_in = generate_sequence(n_in)
    sequence_transform = transform_sequence(sequence_in)
    sequence_out = np.ones_like(sequence_in)*pad_value
    sequence_out[:n_out] = sequence_transform[:n_out]
    
    # One Hot Encode the input values
    X = one_hot_encode(sequence_in)
    y = one_hot_encode(sequence_out)
    
    return X,y

## Data generator
This is a simple data generator to keep pulling random batches for training the model.

In [6]:
def get_batch(n_in, n_out, batch_size):
    
    while True:
        
        X_batch = []
        y_batch = []
        
        for idx in range(batch_size):
            
            X,y = get_data(n_in, n_out)
            X_batch.append(X[0])
            y_batch.append(y[0])
            
        yield np.array(X_batch), np.array(y_batch)

## seq2seq model

Here's the entire sequence to sequence model using an encoder of LSTM and a decoder of LSTM. We only pass the last hidden state of the encoder to the decoder.

In [None]:
# define model
model = Sequential()
model.add(LSTM(n_hidden_units, input_shape=(n_timesteps_in, n_features)))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(n_hidden_units, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation="softmax")))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"])

## Train the model

In [None]:
batch_size = 256         # Batch size for training
num_epochs = 15          # Number of epochs
training_steps = 1000    # Number of training steps per epoch
validation_steps = 100   # Number of validation steps

# Create a batch generator for the training data
train_generator = get_batch(n_timesteps_in, n_timesteps_out, batch_size)

# Create a batch generator for the validation data
validate_generator = get_batch(n_timesteps_in, n_timesteps_out, batch_size)

# Create callbacks for model saving and TensorBoard
checkpoint = keras.callbacks.ModelCheckpoint("seq2seq_model.h5", monitor="val_loss", 
                                verbose=0, save_best_only=True)
tensorboard = keras.callbacks.TensorBoard(log_dir="./tb_logs", write_graph=True)
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=4, 
                                               verbose=0, mode="auto")

history = model.fit_generator(train_generator, steps_per_epoch=training_steps, 
                              epochs=num_epochs, 
                              validation_data=validate_generator, 
                              validation_steps=validation_steps,
                              verbose=2)

Epoch 1/15


## Print some test cases.

In [None]:
# Check a few example predictions to sanity check trained model
num_examples=10
for idx in range(num_examples):
    X,y = get_pair(n_timesteps_in, n_timesteps_out)
    yhat = model.predict(X, verbose=0)
    print("*"*20)
    print("Test case #{}".format(idx+1))
    print("Input = ", one_hot_decode(X[0]))
    print("Expected Output:", one_hot_decode(y[0]))
    print("Model Predicted Output:", one_hot_decode(yhat[0]))
    print("*"*20)
    print("\n")

## Try a manual test case

Enter in your own sequence to test the model.

In [None]:
X = one_hot_encode([3,2, 4,5,-3,2,1,4])
yhat = model.predict(X, verbose=0)
print("*"*20)
print("Manual Test Case".format(idx+1))
print("Input = ", one_hot_decode(X[0]))
print("Expected Output:", one_hot_decode(y[0]))
print("Model Predicted Output:", one_hot_decode(yhat[0]))
print("*"*20)
print("\n")