# Setup

In [77]:
import numpy as np
from collections import Counter

import tensorflow as tf
from tensorflow.keras import (
    utils,
    preprocessing,
    models,
    layers,
    callbacks,
    datasets
)
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import tensorflow_addons as tfa

In [38]:
AUTOTUNE = tf.data.AUTOTUNE

# Generating Shakespearean Text Using a Character RNN

## Creating the Training Dataset

In [3]:
shakespeare_url = 'https://homl.info/shakespeare' # shortcut URL
filepath = utils.get_file('shakespeare.txt', shakespeare_url)

with open(filepath) as f:
    shakespeare_text = f.read()

In [4]:
# Let's tokenize the text by characters
tokenizer = preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(shakespeare_text)

In [5]:
# Let's see an example of how to tokenize
sequence = tokenizer.texts_to_sequences(['Beverly and Christa'])
new_phrase = tokenizer.sequences_to_texts(sequence)
print(new_phrase)

['b e v e r l y   a n d   c h r i s t a']


In [6]:
max_id = len(tokenizer.word_index)
dataset_size = tokenizer.document_count
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1 

In [7]:
# Let's separate the data into training, validation and test
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



2022-03-04 20:18:25.515856: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-04 20:18:25.516036: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Chopping the Sequential Dataset into Multiple Windows

In [8]:
# Let's chop the sequential dataset
n_steps = 100
window_length = n_steps + 1 # target = input shifted 1 character ahead
dataset = dataset.window(window_length, shift=1, drop_remainder=True)

# Flattening the dataset 
dataset = dataset.flat_map(lambda window: window.batch(window_length))

In [9]:
batch_size = 250
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [10]:
# Encode the categorical feature as one-hot enconding
dataset = dataset.map(lambda x_b, y_b: (tf.one_hot(x_b, depth=max_id), y_b))
dataset.prefetch(1)

<PrefetchDataset element_spec=(TensorSpec(shape=(None, None, 39), dtype=tf.float32, name=None), TensorSpec(shape=(None, None), dtype=tf.int64, name=None))>

## Building adn Training the Char-RNN Model

In [11]:
model = models.Sequential([
    layers.GRU(128, return_sequences=True, input_shape=[None, max_id], dropout=0.2),
    layers.GRU(128, return_sequences=True, dropout=0.2),
    layers.Dense(max_id, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam')

history = model.fit(dataset, epochs=20)

Epoch 1/20


2022-03-04 20:18:26.062622: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-03-04 20:18:27.325265: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-04 20:18:28.222278: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-04 20:18:28.377301: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-04 20:18:28.567077: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-04 20:18:28.831567: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## Using the Char-RNN Model

In [12]:
def preprocess(texts):
    x = np.array(tokenizer.texts_to_sequences(texts))
    return tf.one_hot(x, max_id)

In [27]:
x_new = preprocess(['nice to meet yo'])
y_pred = np.argmax(model.predict(x_new), axis=-1)
char_pred = tokenizer.sequences_to_texts(y_pred + 1)[0][-1]
print(char_pred)

 


## Generating Fake Shakespearean Text

In [32]:
def next_char(text, temperature=1):
    x_new = preprocess([text])
    y_proba = model.predict(x_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [33]:
def complete_text(text, n_chars=50, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [34]:
print(complete_text('t', temperature=0.2))

tueer             
 h                              


In [35]:
print(complete_text('w', temperature=1))

w al,lli m
  
; ri tn groe'h,e.tuwolli ose n
e ' ts


In [36]:
print(complete_text('w', temperature=2))

w fee,ee
.sx,o?nsn dose
pvordu! sccclr
  t:uduanwlr


## Stateful RNN

In [39]:
# Preparing the data for stateful RNN

dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
dataset = dataset.window(window_length, shift=n_steps, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_length))
dataset = dataset.batch(1)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]), num_parallel_calls=AUTOTUNE)
dataset = dataset.map(lambda x_b, y_b: (tf.one_hot(x_b, depth=max_id), y_b), num_parallel_calls=AUTOTUNE)
dataset = dataset.prefetch(AUTOTUNE)

In [40]:
stateful_model = models.Sequential([
    layers.GRU(128, return_sequences=True, stateful=True, dropout=0.2,
               batch_input_shape=[32, None, max_id]),
    layers.GRU(128, return_sequences=True, stateful=True, dropout=0.2),
    layers.TimeDistributed(layers.Dense(max_id, activation='softmax'))
])

In [44]:
# Create a callback to reset the states at the end of each epoch
class ResetStatesCallback(callbacks.Callback):
    def on_epoch_begin(self, epoch, logs):
        self.model.reset_states()

In [43]:
# model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
# model.fit(dataset, epochs=50, callbacks=[ResetStatesCallback()])

# Sentiment Analysis

In [46]:
# Load the IMDb reviews dataset

(x_train, y_train), (x_test, y_test) = datasets.imdb.load_data()
x_train[0][:10]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]

In [48]:
# Decode the message 
word_index = datasets.imdb.get_word_index()
id_to_word = {id_ + 3: word for word, id_ in word_index.items()}

for id_, token in enumerate(('<pad>', '<sos>', '<unk>')):
    id_to_word[id_] = token
    
' '.join([id_to_word[id_] for id_ in x_train[0][:20]])

"<sos> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you"

In [54]:
imdb_datsets, info = tfds.load('imdb_reviews', as_supervised=True, with_info=True)
train_size = info.splits['train'].num_examples

In [55]:
# Preprocessing function for the data
def preprocess(x_batch, y_batch):
    x_batch = tf.strings.substr(x_batch, 0, 300)
    x_batch = tf.strings.regex_replace(x_batch, b'<br\\s*/?>', b' ')
    x_batch = tf.strings.regex_replace(x_batch, b"[^a-zA-Z']", b' ')
    x_batch = tf.strings.split(x_batch)
    return x_batch.to_tensor(default_value=b'<pad>'), y_batch

In [56]:
# Let's count the occurrences of the words in the dataset 
vocabulary = Counter()
for x_batch, y_batch in imdb_datsets['train'].batch(32).map(preprocess):
    for review in x_batch:
        vocabulary.update(list(review.numpy()))

In [57]:
# Let's look at the three most common words
vocabulary.most_common()[:3]

[(b'<pad>', 214309), (b'the', 61137), (b'a', 38564)]

In [58]:
vocab_size = 10000
truncated_vocabulary = [
    word for word, count in vocabulary.most_common()[:vocab_size]]

In [59]:
# Let's replace the words for their ID (index in the vocabulary)
words = tf.constant(truncated_vocabulary)
word_ids = tf.range(len(truncated_vocabulary), dtype=tf.int64)
vocab_init = tf.lookup.KeyValueTensorInitializer(words, word_ids)
num_oov_buckets = 1000
table = tf.lookup.StaticVocabularyTable(vocab_init, num_oov_buckets)

In [61]:
# Let's use the previous table to look up for some words
table.lookup(tf.constant([b"This movie was bullshit, I don't buy it".split()]))

<tf.Tensor: shape=(1, 8), dtype=int64, numpy=array([[   22,    12,    11, 10252,     6,   102,  1020,    10]])>

In [62]:
# Create a function that encodes the words using the table created
def encode_words(x_batch, y_batch):
    return table.lookup(x_batch), y_batch

In [63]:
train_set = imdb_datsets['train'].batch(32).map(preprocess, num_parallel_calls=AUTOTUNE)
train_set = train_set.map(encode_words, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

In [64]:
embed_size = 128
model = models.Sequential([
    layers.Embedding(vocab_size + num_oov_buckets, embed_size,
                     input_shape=[None]),
    layers.GRU(128, return_sequences=True),
    layers.GRU(128),
    layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(train_set, epochs=5)

Epoch 1/5


2022-03-07 14:19:13.434790: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-07 14:19:13.749624: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-07 14:19:13.887233: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-07 14:19:14.588084: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-03-07 14:19:14.872878: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [65]:
# Let's create a similar model with masking
k = tf.keras.backend
inputs = layers.Input(shape=[None])
mask = layers.Lambda(lambda inputs: k.not_equal(inputs, 0))(inputs)
z = layers.Embedding(vocab_size + num_oov_buckets, embed_size)(inputs)
z = layers.GRU(128, return_sequences=True)(z, mask=mask)
z = layers.GRU(128)(z, mask=mask)
outputs = layers.Dense(1, activation='sigmoid')(z)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])

## Reusing Pretrained Embeddings

In [67]:
model = tf.keras.Sequential([
    hub.KerasLayer('https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1',
                   dtype=tf.string, input_shape=[], output_shape=[50]),
    layers.Dense(128, activation='elu', kernel_initializer='he_normal'),
    layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

2022-03-07 15:59:09.168002: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [68]:
# Let's train the same IMDb dataset on this new model
imdb_datsets, info = tfds.load('imdb_reviews', as_supervised=True, with_info=True)
train_size = info.splits['train'].num_examples
batch_size = 32
train_set = imdb_datsets['train'].batch(batch_size).prefetch(AUTOTUNE)

history = model.fit(train_set, epochs=10)

Epoch 1/10
  1/782 [..............................] - ETA: 4:28 - loss: 0.6772 - accuracy: 0.4688

2022-03-07 16:02:59.668102: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [70]:
train_set

<PrefetchDataset element_spec=(TensorSpec(shape=(None,), dtype=tf.string, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [76]:
phrase = tf.constant(['God Loves us'])
pred = model.predict(phrase)
pred

array([[0.9465219]], dtype=float32)

# An Encoder-Decoder Network for Neural Machine Translations

In [79]:
encoder_inputs = layers.Input(shape=[None], dtype=tf.int32)
decoder_inputs = layers.Input(shape=[None], dtype=tf.int32)
sequence_length = layers.Input(shape=[], dtype=tf.int32)

embeddings = layers.Embedding(vocab_size, embed_size)
encoder_embeddings = embeddings(encoder_inputs)
decoder_embeddings = embeddings(decoder_inputs)

encoder = layers.LSTM(512, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embeddings)
encoder_state = [state_h, state_c]

sampler = tfa.seq2seq.sampler.TrainingSampler()

decoder_cell = layers.LSTMCell(512)
output_layer = layers.Dense(vocab_size)
decoder = tfa.seq2seq.basic_decoder.BasicDecoder(decoder_cell, sampler, output_layer=output_layer)

final_outputs, final_state, final_sequence_lengths = decoder(
    decoder_embeddings, initial_state=encoder_state,
    sequence_length=sequence_length)
y_proba = tf.nn.softmax(final_outputs.rnn_output)

model = tf.keras.Model(inputs=[encoder_inputs, decoder_inputs, sequence_length],
                       outputs=[y_proba])

## Bidirectional RNNs

In [80]:
# To create a bidirectional recurrent layer
layers.Bidirectional(layers.GRU(10, return_sequences=True))

<keras.layers.wrappers.Bidirectional at 0x360576970>

## Beam Search

In [None]:
# Let's implement beam search
beam_width = 10
decoder = tfa.seq2seq.beam_search_decoder.BeamSearchDecoder(
    cell=decoder_cell, beam_width=beam_width, output_layer=output_layer)
decoder_initial_state = tfa.seq2seq.beam_search_decoder.tile_batch(
    encoder_state, multiplier=beam_width
)
outputs, _, _ = decoder(
    decoder_embeddings #, start_tokens=start_tokens, end_token=end_token,
    initial_state=decoder_initial_state
)

# Attention Mechanisms

# Recent Innovations in Language Models

# Exercises