Take a look at https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html

In [1]:
%matplotlib inline
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
import os
import tensorflow as tf

In [2]:
root = "../data/"
noise=0.0
history = 100 
future = 12
train_batch = 200
test_batch = 200
num_epochs = 5

train = np.load(os.path.join(root, "noisy%s_train.npy" %noise))
vad = np.load(os.path.join(root, "noisy%s_vad.npy" %noise))
test = np.load(os.path.join(root, "noisy%s_test.npy" %noise))

train_steps = train.shape[0] // train_batch
vad_steps = vad.shape[0] // train_batch
test_steps = test.shape[0] // test_batch

In [3]:
train.shape, vad.shape, test.shape

((70350, 224), (9365, 224), (8781, 224))

In [4]:
def make_dataset(data):
    return tf.data.Dataset.from_tensor_slices(data)

def make_iterator(dataset, batch_size, num_epochs):
    return dataset.batch(batch_size).repeat(num_epochs).make_one_shot_iterator().get_next()

def split(data, history, future, batch_size, num_epochs=1):
    xf, xl = data[:, :history], data[:, history:history+future]
    yf, yl = data[:, history+future:2*history+future], data[:, 2*history+future:]
    # add start of sentence to labels (input to the decoder)
    yl_input = np.zeros(shape=(yl.shape[0], yl.shape[1]+1))
    yl_input[:, 1:] = yl
    # add end of sentence to labels (output of the decoder)
    yl_output = np.zeros(shape=(yl.shape[0], yl.shape[1]+1))
    yl_output[:, :-1] = yl
    
    # add new dimension at the end of all arrays
    yf = yf[:, :, np.newaxis]
    yl_input = yl_input[:, :, np.newaxis].astype(np.float32)
    yl_output = yl_output[:, :, np.newaxis].astype(np.float32)
    
    return (yf, yl_input), yl_output

train_gen = split(train, history, future, train_batch, num_epochs)
vad_gen = split(vad, history, future, train_batch, 1)
test_gen = split(test, history, future, test_batch, 1)

In [5]:
train_gen[0][0].shape, train_gen[0][1].shape, train_gen[1].shape

((70350, 100, 1), (70350, 13, 1), (70350, 13, 1))

In [6]:
from tensorflow.python.keras.layers import Dense, LSTM, Input
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.losses import MAE
from tensorflow.python.keras.metrics import MAPE
from tensorflow.python.keras.backend import clear_session

In [7]:
def seq2seq(history, future, latent_dim):
    clear_session()
    # Define an input sequence and process it.
    encoder_inputs = Input(shape=(history, 1))
    encoder = LSTM(latent_dim, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    # We discard `encoder_outputs` and only keep the states.
    encoder_states = [state_h, state_c]

    # Set up the decoder, using `encoder_states` as initial state.
    decoder_inputs = Input(shape=(future+1, 1))
    # We set up our decoder to return full output sequences,
    # and to return internal states as well. We don't use the 
    # return states in the training model, but we will use them in inference.
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                         initial_state=encoder_states)
    decoder_dense = Dense(1, activation='selu')
    decoder_outputs = decoder_dense(decoder_outputs)

    # Define the model that will turn
    # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    return model

In [8]:
m2 = seq2seq(history, future, latent_dim=50)
m2.compile(optimizer='rmsprop', loss=MAE, metrics=[MAPE, MAE])
h2 = m2.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
m3 = seq2seq(history, future, latent_dim=50)
m3.compile(optimizer='rmsprop', loss=MAPE, metrics=[MAPE, MAE])
h3 = m3.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
m4 = seq2seq(history, future, latent_dim=100)
m4.compile(optimizer='rmsprop', loss=MAE, metrics=[MAPE, MAE])
h4 = m4.fit(x=train_gen[0], y=train_gen[1], batch_size=train_batch, epochs=num_epochs, 
      validation_data=vad_gen)

Train on 70350 samples, validate on 9365 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
