# Chapter 16 Natual Language Processing with RNNs and Attention

## Generating Shakespearean Text Using a Character RNN

In [1]:
## basic import
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
## basic setting for tensorflow
print(tf.config.list_physical_devices('GPU'))
tf.debugging.set_log_device_placement(False)
# set GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Creating the Training Dataset

In [1]:
from tensorflow.keras.callbacks import ModelCheckpoint

In [5]:
shakespeare_url = "https://homl.info/shakespeare"
file_path = keras.utils.get_file("/tf/notebooks/shakespeare.txt", shakespeare_url)
with open('shakespeare.txt', 'r') as f:
    shakespeare_text = f.read()

In [6]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)

In [7]:
tokenizer.fit_on_texts([shakespeare_text])

In [8]:
tokenizer.texts_to_sequences(['first'])

[[20, 6, 9, 8, 3]]

In [9]:
tokenizer.sequences_to_texts([[20, 6, 9, 8, 3]])

['f i r s t']

In [10]:
max_id = len(tokenizer.word_index) # number of distinct character

In [11]:
tokenizer.document_count

1

In [12]:
dataset_size = len(shakespeare_text)

In [13]:
tokenizer.index_word.keys()

dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39])

In [14]:
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1

### How to Split a Sequential Dataset

In [15]:
# 90% as training set
train_size = dataset_size * 90 // 100

In [35]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0


### Chopping the Sequential Dataset into Multiple Windows

In [36]:
n_steps = 100

In [37]:
window_length = n_steps + 1 # target = input shifted 1 character ahead

In [38]:
dataset = dataset.window(window_length, shift=1, stride=1, drop_remainder=True)

Executing op WindowDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [39]:
# dataset is now a nested dataset, we need to flat it, and before flatting, we apply batch() with the same length of each windwo
dataset = dataset.flat_map(lambda window: window.batch(window_length))
# now dataset looks like {[0,1,...,100], [1,2,...101], ...}
# for each window/element in it, first 100 characters are training input, last 100 characters are target

Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [40]:
batch_size = 1024
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

Executing op ShuffleDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [41]:
# ont-hot encoding
dataset = dataset.map(lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [42]:
dataset = dataset.prefetch(1)

Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [43]:
dataset

<PrefetchDataset shapes: ((None, None, 39), (None, None)), types: (tf.float32, tf.int64)>

### Building and Training the Char-RNN Model

the conventional way is to have the target outputs converted to the one-hot encoded array to match with the output shape, however, with the help of the `sparse_categorical_crossentropy` loss function, we can skip that step and keep the integers as targets.

In [44]:
# creat echeckpoint, save model weights every 256 steps
checkpoint = ModelCheckpoint("/tf/notebooks/stateless_model_weights.hdf5", monitor='loss', \
                             save_weights_only=True, save_best_only=True, save_freq=256)
# save_freq: triggered after seen that number of steps(batch of samples)

In [45]:
# the 'None' is first GRU input_shape hyperparameter is for n_steps(length of sequence, it's doesn't matter)
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, input_shape=(None, max_id), \
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.GRU(128, return_sequences=True, \
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, \
                                                    input_shape=(100, 128), activation='softmax'))
])
model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.Adam(0.001))



In [49]:
model.load_weights('stateless_model_weights.hdf5')

In [50]:
history = model.fit(dataset, epochs=50, callbacks=[checkpoint])

Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 1/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 2/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 3/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 4/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localh

Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 33/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 34/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 35/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 36/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 37/50
Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Epoch 38/50
Executing op OptimizeD

### Using the Char-RNN Models

In [28]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, depth=max_id)

In [77]:
X_new = preprocess(['How are yo'])
Y_pred = model.predict_classes(X_new)
tokenizer.sequences_to_texts(Y_pred+1)[0][-1]

Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0


'u'

### Generating Fake Shakespearean Text

In [78]:
def next_text(text, temperature=1):
    X_new = preprocess([text])
    y_prob = model.predict(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_prob) / temperature
    char_id = tf.random.categorical(rescaled_logits, 1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [79]:
def complete_text(text, n_chars=50, temperature=1):
    for _ in range(int(n_chars)):
        text += next_text(text, temperature)
    return text

In [80]:
print(complete_text('t', 0.2))

t


In [81]:
print(complete_text('w', 1))

Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
we


In [82]:
print(complete_text('w', 2))

Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapD

### Stateful RNN

In [97]:
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift=n_steps, stride=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))
dataset = dataset.batch(1)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
dataset = dataset.map(lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))
dataset = dataset.prefetch(1)

Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op WindowDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [107]:
# because it's stateful model, we need to specify the batch size in 'batch_input_shape'
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, stateful=True, \
                     dropout=0.2, recurrent_dropout=0.2, \
                     batch_input_shape=(1, None, max_id)),
    keras.layers.GRU(128, return_sequences=True, stateful=True, \
                     dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id, activation='softmax'))
])

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0


In [108]:
# we need to reset the hidden state before we go back to the begin (i.e. start a epoch)
class ResetStatesCallback(keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [109]:
# creat echeckpoint
checkpoint = ModelCheckpoint("/tf/notebooks/stateful_model_weights.hdf5", monitor='loss', \
                             save_weights_only=True, save_best_only=True, save_freq=256)

In [110]:
model.load_weights('stateful_model_weights.hdf5')

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.fit(dataset, epochs=10, callbacks=[ResetStatesCallback(), checkpoint])

## Sentiment Analysis

Import data from keras datasets

In [16]:
from tensorflow import keras

In [2]:
(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [9]:
X_train[0][:10]

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]

In [11]:
word_index = keras.datasets.imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [15]:
## shift 3 position, save for <pad>, <sos>, <unk>
id_to_word = {idx+3: word for word, idx in word_index.items()}

In [16]:
for idx, word in enumerate(["<pad>", "<sos>", "<unk>"]):
    id_to_word[idx] = word

In [19]:
" ".join([id_to_word[idx] for idx in X_train[0][:20]])

"<sos> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you"

Preprocess the raw imdb dataset, we can learn something from this: how to preprocess the dataset

In [6]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
datasets, info = tfds.load('imdb_reviews', as_supervised=True, with_info=True)

In [8]:
train_size = info.splits['train'].num_examples

In [9]:
# def preprocessing function
def preprocess(X_batch, y_batch):
    # fix length
    X_batch = tf.strings.substr(X_batch, 0, 300)
    # filtering unused characters
    X_batch = tf.strings.regex_replace(X_batch, b"<br\\s*/?>", b" ")
    X_batch = tf.strings.regex_replace(X_batch, b"[^a-zA-Z']", b" ")
    # split reviews by spaces
    X_batch = tf.strings.split(X_batch)
    return X_batch.to_tensor(default_value=b"<pad>"), y_batch

In [10]:
from collections import Counter

In [None]:
vocabulary = Counter()
for X_batch, y_batch in datasets['train'].batch(32).map(preprocess):
    for review in X_batch:
        vocabulary.update(list(review.numpy()))

In [12]:
vocabulary.most_common(5)

[(b'<pad>', 214309),
 (b'the', 61137),
 (b'a', 38564),
 (b'of', 33983),
 (b'and', 33431)]

In [13]:
vocab_size = 10000
truncated_vocabulary = [word for word, count in vocabulary.most_common(vocab_size)]

In [14]:
words = tf.constant(truncated_vocabulary)
word_ids = tf.range(len(truncated_vocabulary), dtype=tf.int64)
vocab_init = tf.lookup.KeyValueTensorInitializer(words, word_ids)
num_oov_buckets = 1000
table = tf.lookup.StaticVocabularyTable(vocab_init, num_oov_buckets)

Executing op Range in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op HashTableV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op LookupTableImportV2 in device /job:localhost/replica:0/task:0/device:CPU:0


In [15]:
table.lookup(tf.constant(["This movie is faaaaaantastic".split()]))

Executing op StringToHashBucketFast in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op LookupTableFindV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op LookupTableSizeV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op NotEqual in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op SelectV2 in device /job:localhost/replica:0/task:0/device:GPU:0


<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[   22,    12,     7, 10053]])>

In [16]:
def encode_words(X_batch, y_batch):
    return table.lookup(X_batch), y_batch

In [17]:
train_set = datasets['train'].batch(32).map(preprocess)
train_set = train_set.map(encode_words).prefetch(1)

Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [None]:
embed_size = 128
model = keras.models.Sequential([
    keras.layers.Embedding(vocab_size + num_oov_buckets, embed_size, input_shape=[None]),
    keras.layers.GRU(128, return_sequences=True),
    keras.layers.GRU(128),
    keras.layers.Dense(1, activation='sigmoid')
])

In [19]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0


In [20]:
checkpoint = ModelCheckpoint('/tf/notebooks/imdb_sentiment.hdf5', monitor='loss', \
                             save_best_only=True, save_weights_only=True, save_freq=128)

In [21]:
model.load_weights('imdb_sentiment.hdf5')

In [None]:
history = model.fit(train_set, epochs=5, callbacks=[checkpoint])

In [23]:
test_set = datasets['test'].batch(32).map(preprocess)
test_set = test_set.map(encode_words).prefetch(1)

Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0


In [None]:
model.evaluate(test_set)

### Masking

In [35]:
K = keras.backend

In [36]:
inputs = keras.layers.Input(shape=[None])
mask = keras.layers.Lambda(lambda inputs: K.not_equal(inputs, 0))(inputs)

# model
z = keras.layers.Embedding(vocab_size + num_oov_buckets, embed_size)(inputs)
z = keras.layers.GRU(128, return_sequences=True)(z, mask=mask)
z = keras.layers.GRU(128)(z, mask=mask)
outputs = keras.layers.Dense(1, activation='sigmoid')(z)
model = keras.Model(inputs=[inputs], outputs=[outputs])

In [40]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

In [38]:
logs_callback = keras.callbacks.TensorBoard(
    log_dir='/tf/notebooks/logs', histogram_freq=0, write_graph=True, write_images=False,
    update_freq=128, profile_batch=2, embeddings_freq=1,
    embeddings_metadata=None
)

checkpoint = ModelCheckpoint('/tf/notebooks/imdb_sentiment_mask.hdf5', monitor='loss', \
                             save_best_only=True, save_weights_only=True, save_freq=128)

In [None]:
history = model.fit(train_set, epochs=5, callbacks=[checkpoint, logs_callback])

### Reusing Pretained Embeddings

Use the pretrained embedding from www.tfhub.dev

In [58]:
import tensorflow_hub as hub
import tensorflow_datasets as tfds

In [73]:
model = keras.models.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim50/2", input_shape=[], dtype=tf.string),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op __inference_restored_function_body_500699 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op __inference_restored_function_body_500727 in device /job:localhost/replica:0/task:0/device:GPU:0


In [74]:
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

In [75]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 50)                48190600  
_________________________________________________________________
dense (Dense)                (None, 128)               6528      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 48,197,257
Trainable params: 6,657
Non-trainable params: 48,190,600
_________________________________________________________________


In [76]:
checkpoint = ModelCheckpoint('/tf/notebooks/imdb_sentiment_pretrained_nnml50.hdf5', monitor='loss', \
                             save_best_only=True, save_weights_only=True, save_freq=128)

In [None]:
datasets, info = tfds.load('imdb_reviews', as_supervised=True, with_info=True)

In [None]:
train_size = info.splits['train'].num_examples
batch_size = 32
train_set = datasets['train'].batch(batch_size).prefetch(1)
test_set = datasets['test'].batch(batch_size).prefetch(1)
history = model.fit(train_set, epochs=1, validation_data=test_set, callbacks=[checkpoint, logs_callback])

## An Encoder-Decoder Network for Neural Machine translation

Basic Encoder-Decoder model

In [3]:
tf.random.set_seed(42)

In [4]:
vocab_size = 100
embed_size = 10

In [5]:
import tensorflow_addons as tfa

In [6]:
## placeholder
encoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
decoder_inputs = keras.layers.Input(shape=[None], dtype=np.int32)
sequence_lengths = keras.layers.Input(shape=[], dtype=np.int32)

## Embeddings
embeddings = keras.layers.Embedding(vocab_size, embed_size)
encoder_embeddings = embeddings(encoder_inputs)
decoder_embeddings = embeddings(decoder_inputs)

## Encoder
encoder = keras.layers.LSTM(512, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]

## Decoder
sampler = tfa.seq2seq.sampler.TrainingSampler()
decoder_cell = keras.layers.LSTMCell(512)
output_layer = keras.layers.Dense(vocab_size)
decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, sampler,
                                                 output_layer=output_layer)

## model outputs
final_outputs, final_state, final_sequence_lengths = decoder(
    decoder_embeddings, initial_state=encoder_state,
    sequence_length=sequence_lengths)
Y_proba = tf.nn.softmax(final_outputs.rnn_output)

In [7]:
model = keras.models.Model(
    inputs=[encoder_inputs, decoder_inputs, sequence_lengths],
    outputs=[Y_proba])

In [9]:
model.compile(loss=keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'], optimizer='adam')

In [25]:
# num_sample=1000
# X is source language
X = np.random.randint(100, size=10*1000).reshape(1000, 10)
# Y is target language
Y = np.random.randint(100, size=15*1000).reshape(1000, 15)
# np.c_: columns concatenate
# start with 0, i.e. <SOS>
# X_decoder is target language
X_decoder = np.c_[np.zeros((1000, 1)), Y[:, :-1]]
seq_lengths = np.full([1000], 15)

model.fit([X, X_decoder, seq_lengths], Y, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fca24c2a5c0>

### Bidirectional RNNs

In [26]:
model = keras.models.Sequential([
    keras.layers.GRU(10, return_sequences=True, input_shape=[None, 10]),
    keras.layers.Bidirectional(keras.layers.GRU(10, return_sequences=True))
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 10)          660       
_________________________________________________________________
bidirectional (Bidirectional (None, None, 20)          1320      
Total params: 1,980
Trainable params: 1,980
Non-trainable params: 0
_________________________________________________________________


### Beam Search

In [None]:
beam_width = 4
decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(\
    cell=decoder_cell, beam_width=beam_width, output_layer=output_layer)
decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(\
    encoder_state, multiplier=beam_width)
outputs, _, _ = decoder(decoder_embeddings, start_tokens=[0], end_token=1,
    initial_state=decoder_initial_state)

## Attention Mechanisims

In [None]:
attention_mechanism = tfa.seq2seq.attention_wrapper.LuongAttention(
    units, encoder_state, memory_sequence_length=encoder_sequence_length)
attention_decoder_cell = tfa.seq2seq.attention_wrapper.AttentionWrapper(
    decoder_cell, attention_mechanism, attention_layer_size=n_units)

### Visual Attention