# Imports

In [176]:
import tensorflow as tf

import numpy as np
import os
import time
import datetime
import logging
import sys
import platform


print('Python version:', platform.python_version())
print('Tensorflow version:', tf.__version__)
print('Keras version:', tf.keras.__version__)

Python version: 3.8.12
Tensorflow version: 2.3.0
Keras version: 2.4.0


# Load data

In [177]:
# Length of the sequence length for training data
seq_length = 200

In [178]:
text = open('shakespeare_train.txt', 'rb').read().decode(encoding='utf-8')

# Sort the unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

67 unique characters


# Process the text
Methods to convert the strings to a numerical representation and the other way around. This section also creates sections of text to define training examples.

Create layer to go from characters to ID's. The input of the layer is the output from tf.strings.unicode_split.

In [179]:
ids_from_chars = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=list(vocab), mask_token=None)

Create layer and method to go from ID's to characters, the reverse process.

In [180]:
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [181]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

Divide training data into chunks.

In [182]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [183]:
examples_per_epoch = len(text)//(seq_length+1)
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

#### Examples of chunks of text

In [184]:
print(sequences)
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

<BatchDataset shapes: (201,), types: tf.int64>
b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '
b"know Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be"
b' done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citizens, the patricians good.\nWhat authority surfeits on would relieve us: if they\nwould yield us but'
b' the superfluity, while it were\nwholesome, we might guess they relieved us humanely;\nbut they think we are too dear: the leanness that\nafflicts us, the object of our misery, is as an\ninventory to parti'
b'cularise their abundance; our\nsufferance is a gain to them Let us revenge this with\nour pikes,

### Create training pairs
Function to make pairs of a letter and the rest that come after it.

In [185]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [186]:
dataset = sequences.map(split_input_target)

Examples of these pairs

In [187]:
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you '


### Create training batches

In [188]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset shapes: ((64, 200), (64, 200)), types: (tf.int64, tf.int64)>

# Define model

In [189]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = (512)

# Simple RNN (True) or LSTM (False)
train_simpleRNN = False

### Define model with and without LSTM's

In [190]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnnLayer = tf.keras.layers.SimpleRNN(rnn_units,
                                       return_sequences=True,
                                       return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.rnnLayer.get_initial_state(x)
        x, states = self.rnnLayer(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x
        
class MyModelLSTM(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.rnnLayer = tf.keras.layers.LSTM(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.rnnLayer.get_initial_state(x)
        x, states1, states2 = self.rnnLayer(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states1, states2
        else:
            return x

In [198]:
modelSimple = MyModel(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = modelSimple(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
    
modelSimple.summary()

(64, 200, 68) # (batch_size, sequence_length, vocab_size)
Model: "my_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     multiple                  17408     
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     multiple                  393728    
_________________________________________________________________
dense_11 (Dense)             multiple                  34884     
Total params: 446,020
Trainable params: 446,020
Non-trainable params: 0
_________________________________________________________________


In [140]:
modelLSTM = MyModelLSTM(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = modelLSTM(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
    
modelLSTM.summary()

(64, 100, 68) # (batch_size, sequence_length, vocab_size)
Model: "my_model_lstm_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_8 (Embedding)      multiple                  17408     
_________________________________________________________________
lstm_8 (LSTM)                multiple                  18882560  
_________________________________________________________________
dense_8 (Dense)              multiple                  139332    
Total params: 19,039,300
Trainable params: 19,039,300
Non-trainable params: 0
_________________________________________________________________


In [199]:
# Model to use
try:
    model = modelSimple
    using_LSTM = False
    print("Using model without LSTM")
except:
    model = modelLSTM
    using_LSTM = True
    print("Using model with LSTM")


Using model without LSTM


# Train model

The Sparse Categorical Crossentropy serves the same purpose as the Bits per Character, that is why it is used here in the loss for the model.

In [200]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss, metrics=["accuracy"])

if using_LSTM:
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("LSTM - Sequence:" + str(seq_length) + " RNN Units: " + str(rnn_units) + "- %Y%m%d-%H%M%S")
else:
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("Sequence:" + str(seq_length) + " RNN Units: " + str(rnn_units) + "- %Y%m%d-%H%M%S")

# Tensorboard config
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

2021-12-28 02:47:34.633338: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-12-28 02:47:34.633435: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1513] CUPTI activity buffer flushed


In [201]:
EPOCHS = 15

### Training without interruptions to get plots

In [108]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[tensorboard_callback])

Epoch 1/15
  2/673 [..............................] - ETA: 1:03 - loss: 4.1929 - accuracy: 0.0898

2021-12-27 03:09:14.239871: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session started.
2021-12-27 03:09:14.360249: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1513] CUPTI activity buffer flushed
2021-12-27 03:09:14.367487: I tensorflow/core/profiler/internal/gpu/device_tracer.cc:223]  GpuTracer has collected 582 callback api events and 582 activity events. 
2021-12-27 03:09:14.395376: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/LSTM - Sequence:100 RNN Units: 2048- 20211227-030912/train/plugins/profile/2021_12_27_03_09_14
2021-12-27 03:09:14.411434: I tensorflow/core/profiler/rpc/client/save_profile.cc:182] Dumped gzipped tool data for trace.json.gz to logs/fit/LSTM - Sequence:100 RNN Units: 2048- 20211227-030912/train/plugins/profile/2021_12_27_03_09_14/lami.trace.json.gz
2021-12-27 03:09:14.423471: I tensorflow/core/profiler/rpc/client/save_profile.cc:176] Creating directory: logs/fit/LSTM - Sequence:100 RN

Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [202]:
class OneStep(tf.keras.Model):
    
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        # - Low temperatures results in more predictable text.
        # - Higher temperatures results in more surprising text.

        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        # Create a mask to prevent the unknown "[UNK]" token from being generated in the text.
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf weight at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, using_LSTM, states=None):
        # Convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
        input_ids = self.ids_from_chars(input_chars).to_tensor()

        # Get the model's predictions
        # predicted_logits.shape is [batch, char, next_char_logits]
        
        if using_LSTM:
            predicted_logits, states1, states2 = self.model(inputs=input_ids, states=states,
                                                  return_state=True)
            states_return = [states1, states2]
        else:
            predicted_logits, states = self.model(inputs=input_ids, states=states,
                                                  return_state=True)
            states_return = states
        # Only use the last prediction.
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits/self.temperature
        
        # Apply the prediction mask for the unknown token
        predicted_logits = predicted_logits + self.prediction_mask

        # Sample the output logits to generate token IDs.
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        # Convert from token ids to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        # Return the characters and model state.
        return predicted_chars, states_return

In [203]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

Get output with a specific input. For the final section of the assignment.

In [115]:
start = time.time()
states = None
next_char = tf.constant(['JULIETA:'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(next_char, using_LSTM, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

JULIETA:
If I can hear by day and night to supper.
Now, for an ush? Timorad within your sight,
Weak washed for your own desert. Let not, even for
Is past; but in the host, to death, are run.

PORTIA:
The webst and mirth o' the kingdom you shall love:
And what this goodly doctor, in Simphrook,
The Turk, that gave these two and wind, behold
Our Gaunts' estimpatives, out, ovidges down
And digg'd his thoughts how to cut off a heart?
Or rather, shall I lack a week with you;
and having that such softer after such
As when we had our kind to embark so long,
Is almost friendship: six ye hope for the time,
Whiles he may conquer fortune and reward.

EXETER:
That is most faint. As black as I have ever committed.

JULIA:
You shall be revolved if your willship that shall
beat you. Yet your cook, it was by that will she ackees.

PISTOL:
The score of court, is yet merely fought?

Second Merchant:
He did; did I nay in his pride deny
us the drink? Is there any man's heart?

ROMEO:
If he be her, now the 

### Training with intermediate results

In [None]:
# FOR LSTM
for epoch in range(EPOCHS):
    model.fit(dataset, epochs=1)
    if (epoch == 1 or epoch == 4 or epoch == 8 or epoch == 12 or epoch == 15):
        print("Generating text after epoch: %d" % epoch)

        states = None
        next_char = tf.constant(['First Citizen:\nBefore we proceed any further,'])
        result = [next_char]

        for n in range(1000):
            next_char, states = one_step_model.generate_one_step(next_char, using_LSTM, states=states)
            result.append(next_char)

        result = tf.strings.join(result)
        end = time.time()
        print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

In [204]:
# FOR THE NORMAL MODEL
for epoch in range(EPOCHS):
    model.fit(dataset, epochs=1)
    if (epoch == 1 or epoch == 7 or epoch == 16 or epoch == 22 or epoch == 29):
        print("Generating text after epoch: %d" % epoch)

        states = None
        next_char = tf.constant(['First Citizen:\nBefore we proceed any further,'])
        result = [next_char]

        for n in range(1000):
            next_char, states = one_step_model.generate_one_step(next_char, False, states=states)
            result.append(next_char)

        result = tf.strings.join(result)
        end = time.time()
        print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

Generating text after epoch: 1
First Citizen:
Before we proceed any further,
But the ood, whice her and langers.

IOWER:
Hes, we'll cemest mades armites
I stand may store
Bring grocker ead, this bear stornced-pirgaich.

AucI at your father
Haph betaly pus,
So spito much monemal.

MICK:
Her, Gies in the caint of the calls and bauth. I work's heart
Deas tike, And of I joy! decome me new in thuse
Caseet as this!

FOUD:
God yet I have weach, unwing as you, deach so I way.

SICOLEEL:
Calm, that:
Tran, I calive 'trick it: his ploverond dast, if thou thinr your paincaso ouths prrengy, ans I one fill he will and none. Gotss, Lood patcus's marnical haughter's mistserp
sit; you worment gold fross dreet,
Trurther's meancered,
Letweel we lann, on Vile; I name to have deacr's meatalk
Sometherch'd to my life his us, come, with All dos, I pay soinour feint,
Ar a Hore;
Allind the commance is tay:
And with sigh to bruded
And thise death and glain he then shase eapine enne to nemes King thyours! my lord

# Tensorboard Plots

In [None]:
#rm -rf ./logs/

In [None]:
#%tensorboard --logdir logs/fit