# Chapter 16

# Exercise 8

Embedded Reber grammars were used by Hochreiter and Schmidhuber in their paper about LSTMs. They are artificial grammars that produce strings such as “BPBTSXXVPSEPE.” Check out Jenny Orr’s nice introduction to this topic. Choose a particular embedded Reber grammar (such as the one represented on Jenny Orr’s page), then train an RNN to identify whether a string respects that grammar or not. You will first need to write a function capable of generating a training batch containing about 50% strings that respect the grammar, and 50% that don’t.

In [1]:
from collections import defaultdict
from random import choice, random, sample
from tensorflow import keras

import tensorflow as tf

In [2]:
from reber import *

## Reber

In [3]:
reber_edges = ((0,1,'B'), (1,2,'T'), (1,3,'P'), (2,2,'S'), (2,4,'X'), (3,3,'T'), (3,5,'V'), (4,3,'X'), (4,6,'S'), (5,4,'P'), (5,6,'V'), (6,None,'E'))

In [4]:
node_dict = dict_from_edges(reber_edges)

In [5]:
node_dict

defaultdict(list,
            {0: [(1, 'B')],
             1: [(2, 'T'), (3, 'P')],
             2: [(2, 'S'), (4, 'X')],
             3: [(3, 'T'), (5, 'V')],
             4: [(3, 'X'), (6, 'S')],
             5: [(4, 'P'), (6, 'V')],
             6: [(None, 'E')]})

In [6]:
sentence = generate_sentence(node_dict)

In [7]:
sentence

((0, 1, 'B'), (1, 3, 'P'), (3, 5, 'V'), (5, 6, 'V'), (6, None, 'E'))

In [8]:
string_from_sentence(sentence)

'BPVVE'

In [9]:
unique_letters(sentence)

'PVBE'

In [10]:
unique_letters(reber_edges)

'VEXTPSB'

In [11]:
sentence_edge = sentence[3]

In [12]:
sentence_edge

(5, 6, 'V')

In [13]:
corrupted_sentence_edge = corrupt_edge(sentence_edge, reber_edges)

In [14]:
corrupted_sentence_edge

(5, 6, 'B')

In [15]:
corrupted_sentence = corrupt_sentence(sentence, reber_edges, 2)

In [16]:
corrupted_sentence

((0, 1, 'X'), (1, 3, 'B'), (3, 5, 'V'), (5, 6, 'V'), (6, None, 'E'))

## Embedder Reber Grammar

In [17]:
embedded_reber_edges = ((0,1,'B'), (1,2,'T'), (1,3,'P'), (2,4,reber_edges), (3,5,reber_edges), (4,6, 'T'), (5,6,'P'), (6,None,'E'))

In [18]:
embedded_reber_edges = flatten_embedded_edges(embedded_reber_edges)

In [19]:
embedded_reber_edges

((0, 1, 'B'),
 (1, 2, 'T'),
 (1, 3, 'P'),
 (2, '2-1', 'B'),
 ('2-1', '2-2', 'T'),
 ('2-1', '2-3', 'P'),
 ('2-2', '2-2', 'S'),
 ('2-2', '2-4', 'X'),
 ('2-3', '2-3', 'T'),
 ('2-3', '2-5', 'V'),
 ('2-4', '2-3', 'X'),
 ('2-4', '2-6', 'S'),
 ('2-5', '2-4', 'P'),
 ('2-5', '2-6', 'V'),
 ('2-6', 4, 'E'),
 (3, '3-1', 'B'),
 ('3-1', '3-2', 'T'),
 ('3-1', '3-3', 'P'),
 ('3-2', '3-2', 'S'),
 ('3-2', '3-4', 'X'),
 ('3-3', '3-3', 'T'),
 ('3-3', '3-5', 'V'),
 ('3-4', '3-3', 'X'),
 ('3-4', '3-6', 'S'),
 ('3-5', '3-4', 'P'),
 ('3-5', '3-6', 'V'),
 ('3-6', 5, 'E'),
 (4, 6, 'T'),
 (5, 6, 'P'),
 (6, None, 'E'))

In [20]:
node_dict = dict_from_edges(embedded_reber_edges)

In [21]:
node_dict

defaultdict(list,
            {0: [(1, 'B')],
             1: [(2, 'T'), (3, 'P')],
             2: [('2-1', 'B')],
             '2-1': [('2-2', 'T'), ('2-3', 'P')],
             '2-2': [('2-2', 'S'), ('2-4', 'X')],
             '2-3': [('2-3', 'T'), ('2-5', 'V')],
             '2-4': [('2-3', 'X'), ('2-6', 'S')],
             '2-5': [('2-4', 'P'), ('2-6', 'V')],
             '2-6': [(4, 'E')],
             3: [('3-1', 'B')],
             '3-1': [('3-2', 'T'), ('3-3', 'P')],
             '3-2': [('3-2', 'S'), ('3-4', 'X')],
             '3-3': [('3-3', 'T'), ('3-5', 'V')],
             '3-4': [('3-3', 'X'), ('3-6', 'S')],
             '3-5': [('3-4', 'P'), ('3-6', 'V')],
             '3-6': [(5, 'E')],
             4: [(6, 'T')],
             5: [(6, 'P')],
             6: [(None, 'E')]})

In [22]:
sentence = generate_sentence(node_dict)

In [23]:
sentence

((0, 1, 'B'),
 (1, 3, 'P'),
 (3, '3-1', 'B'),
 ('3-1', '3-3', 'P'),
 ('3-3', '3-5', 'V'),
 ('3-5', '3-4', 'P'),
 ('3-4', '3-3', 'X'),
 ('3-3', '3-5', 'V'),
 ('3-5', '3-6', 'V'),
 ('3-6', 5, 'E'),
 (5, 6, 'P'),
 (6, None, 'E'))

In [24]:
string_from_sentence(sentence)

'BPBPVPXVVEPE'

In [25]:
corrupt_sentence(sentence, embedded_reber_edges, 3)

((0, 1, 'B'),
 (1, 3, 'P'),
 (3, '3-1', 'S'),
 ('3-1', '3-3', 'E'),
 ('3-3', '3-5', 'V'),
 ('3-5', '3-4', 'S'),
 ('3-4', '3-3', 'X'),
 ('3-3', '3-5', 'V'),
 ('3-5', '3-6', 'V'),
 ('3-6', 5, 'E'),
 (5, 6, 'P'),
 (6, None, 'E'))

## Generate Training Data

We will write a generator function that produces a reber sentence. With equal probability, the sentence will be corrupted (label 0). If corrupted, the number of corruptions is randonmly determined.

In [26]:
def generate_reber_training_sample(max_corruptions, edges, node_dict, allowed_chars):
    sentence = generate_sentence(node_dict)
    if random() < .5:
        num_corruptions = choice(range(1,max_corruptions+1))
        sentence = corrupt_sentence(sentence, edges, num_corruptions)
        label = 0
    else:
        label = 1
    s = string_from_sentence(sentence)
    x = string_to_ids(s, allowed_chars)
    x = tf.ragged.constant(x, dtype=tf.int8, ragged_rank=0)
    y = tf.constant(label, dtype=tf.int8)
    return (x, y)

In [27]:
def training_data_generator(max_corruptions, edges, n=10000):
    node_dict = dict_from_edges(edges)
    allowed_chars = unique_letters(edges)
    for i in range(n):
        yield generate_reber_training_sample(max_corruptions, edges, node_dict, allowed_chars)

## Train a model

In [28]:
max_corruptions = 3
embedding_size = 5
input_dim = len(unique_letters(embedded_reber_edges)) + 1

In [29]:
data = tf.data.Dataset.from_generator(lambda: training_data_generator(max_corruptions, embedded_reber_edges),
                                     output_types=(tf.int8, tf.int8), output_shapes=(tf.TensorShape([None]), tf.TensorShape([])))

In [30]:
data = data.padded_batch(32).prefetch(1)

In [31]:
model = keras.models.Sequential([
    keras.layers.Embedding(input_dim=input_dim, output_dim=embedding_size, mask_zero=True),
    keras.layers.GRU(30),
    keras.layers.Dense(1, activation="sigmoid")
])

In [32]:
optimizer = keras.optimizers.Nadam(learning_rate = 0.01)
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [33]:
history = model.fit(data, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Let's see how well an LSTM layer works

In [34]:
model_lstm = keras.models.Sequential([
    keras.layers.Embedding(input_dim=input_dim, output_dim=embedding_size, mask_zero=True),
    keras.layers.LSTM(30),
    keras.layers.Dense(1, activation="sigmoid")
])

In [35]:
model_lstm.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [36]:
history_lstm = model_lstm.fit(data, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Finally, let's try a SimpleRNN

In [38]:
model_rnn = keras.models.Sequential([
    keras.layers.Embedding(input_dim=input_dim, output_dim=embedding_size, mask_zero=True),
    keras.layers.SimpleRNN(30, return_sequences=True),
    keras.layers.SimpleRNN(30, return_sequences=True),
    keras.layers.SimpleRNN(30),
    keras.layers.Dense(1, activation="sigmoid")
])

In [39]:
model_rnn.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

In [40]:
history_rnn = model_rnn.fit(data, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
