### Different methods to represent input in Sequence-based models

In [1]:
from keras.layers import Input, Embedding, LSTM, GRU, RepeatVector, TimeDistributed, Dense
from keras.models import Model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
n_features =  100 # e.g. number of unique characters/ words in corpus
embedding_dim = 256 # word embedding layer
rnn_hidden_nodes = 128
batch_size = 1
n_timesteps = 32 # sequence length of RNN layer

#### 1. Explicitly defining batch shape

Notice that we do not specify timesteps or sequence length here (None)

In [3]:
input_layer = Input(shape=(None,), batch_shape=(1, None), name='encoder_inputs')

In [4]:
input_layer.shape

TensorShape([Dimension(1), Dimension(None)])

#### 2. Variable length (none defined time steps / sequence length)

We do not specify number of examples per batch and sequence length here.

In [5]:
input_layer = Input(shape=(None,), name='encoder_inputs')

In [6]:
input_layer.shape

TensorShape([Dimension(None), Dimension(None)])

#### 3. Fixed length sequence

Notice that we define fixed sequence length = 32

In [7]:
input_layer = Input(shape=(n_timesteps,), name='encoder_inputs')

In [8]:
input_layer.shape

TensorShape([Dimension(None), Dimension(32)])

#### 4. 3-D shape input tensor (number of batch, timesteps, features)

Compare this examples with the ones in Practical 5, in which we use Lambda layer as one-hot projection layer

In [9]:
input_layer = Input(shape=(n_timesteps, n_features), name='encoder_inputs')

In [10]:
input_layer.shape

TensorShape([Dimension(None), Dimension(32), Dimension(100)])

### Example of simple architecture of sequence to sequence

Note that we do not give running examples for this one. Keep in mine different architectures may exist depending how we define learning mechanism of our model.

In [11]:
input_layer = Input(shape=(None,), batch_shape=(1, None), name='encoder_inputs')
embedding_layer = Embedding(batch_input_shape=(batch_size, n_timesteps), \
                              input_dim=n_features, \
                            output_dim=embedding_dim, mask_zero=True, \
                              name='embedding_encoder')(input_layer)
recurrent_layer1 = GRU(units=rnn_hidden_nodes, \
                       return_sequences=True, \
                       stateful=True)(embedding_layer)
recurrent_layer2 = GRU(units=rnn_hidden_nodes, \
                       return_sequences=True, \
                       stateful=True)(recurrent_layer1)
pred_layer = TimeDistributed(Dense(n_features, \
                                   activation="softmax"))(recurrent_layer2)

In [12]:
model = Model(input_layer, pred_layer)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_inputs (InputLayer)  (1, None)                 0         
_________________________________________________________________
embedding_encoder (Embedding (1, None, 256)            25600     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 128)            147840    
_________________________________________________________________
gru_2 (GRU)                  (1, None, 128)            98688     
_________________________________________________________________
time_distributed_1 (TimeDist (1, None, 100)            12900     
Total params: 285,028
Trainable params: 285,028
Non-trainable params: 0
_________________________________________________________________


### Different architectures of Sequence-to-Sequence model

### Model 1: Sequence-to-Sequence with RepeatVector

![Image](./sts_1.png?raw=true)

In [13]:
timesteps = 50
n_features = 100
rnn_dim = 128

Input array for LSTM layer need to be in 3D tensor shape. Thus, we define our input as 3D tensor as the input of LSTM layer. Compare with examples in which we use Lambda one-hot projection layer and/or embedding layer in word-level model.

In [14]:
inputs = Input(shape=(timesteps, n_features))

In [15]:
inputs.shape

TensorShape([Dimension(None), Dimension(50), Dimension(100)])

In [16]:
encoded = LSTM(rnn_dim)(inputs)

In [17]:
encoded.shape

TensorShape([Dimension(None), Dimension(128)])

In [18]:
decoded = RepeatVector(timesteps)(encoded)

In [19]:
decoded.shape

TensorShape([Dimension(None), Dimension(50), Dimension(128)])

In [20]:
decoded = LSTM(rnn_dim, return_sequences=True)(decoded)

In [21]:
decoded.shape

TensorShape([Dimension(None), Dimension(None), Dimension(128)])

In [22]:
prediction = (Dense(n_features, activation='softmax'))(decoded)

In [23]:
sequence_autoencoder = Model(inputs, prediction)

In [24]:
sequence_autoencoder.output_shape

(None, 50, 100)

In [25]:
sequence_autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 50, 100)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               117248    
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 50, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 50, 128)           131584    
_________________________________________________________________
dense_2 (Dense)              (None, 50, 100)           12900     
Total params: 261,732
Trainable params: 261,732
Non-trainable params: 0
_________________________________________________________________


In [26]:
encoder = Model(inputs, encoded)

In [27]:
encoder.output_shape

(None, 128)

## Model 2: Sequence-to-sequence with teacher forcing

![Image](./sts_2.png?raw=true)

In [28]:
######### Encoder model
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None, timesteps), name='encoder-input')
encoder = LSTM(128, name='lstm-encoder', return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]


######### Decoder model
decoder_inputs = Input(shape=(None, timesteps+1), name='decoder-input')

decoder_lstm = LSTM(128, name='lstm-decoder', return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(n_features, activation='softmax', name='prediction-layer')
decoder_outputs = decoder_dense(decoder_outputs)

In [29]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [30]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder-input (InputLayer)      (None, None, 50)     0                                            
__________________________________________________________________________________________________
decoder-input (InputLayer)      (None, None, 51)     0                                            
__________________________________________________________________________________________________
lstm-encoder (LSTM)             [(None, 128), (None, 91648       encoder-input[0][0]              
__________________________________________________________________________________________________
lstm-decoder (LSTM)             [(None, None, 128),  92160       decoder-input[0][0]              
                                                                 lstm-encoder[0][1]               
          