In [1]:
import os
import warnings

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [2]:
import time

import numpy as np
import tensorflow as tf

Downloading the dataset (Shakespeare)

In [3]:
path_to_file = tf.keras.utils.get_file(
    "shakespeare.txt",
    "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt",
)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Read the dataset

In [4]:
text = open(path_to_file, "rb").read().decode(encoding="utf-8")
print(f"Length of text: {len(text)} characters")

Length of text: 1115394 characters


In [5]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [6]:
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

65 unique characters


**Then process the text**

Vectorizing the text using keras

In [7]:
example_texts = ["abcdefg", "xyz"]

#TODO 1
chars = tf.strings.unicode_split(example_texts, input_encoding="UTF-8")
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [8]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None
)

In [9]:
ids = ids_from_chars(chars)
ids

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>

To be human readable, turn the vector to human language

In [10]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None
)

In [11]:
chars = chars_from_ids(ids)
chars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [12]:
#to join the characters back to strings

tf.strings.reduce_join(chars, axis=1).numpy()

array([b'abcdefg', b'xyz'], dtype=object)

In [13]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

 **The next is prediction task**

1st, create training examples and targets

In [14]:
#TODO 2
all_ids = ids_from_chars(tf.strings.unicode_split(text, "UTF-8"))
all_ids

<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([19, 48, 57, ..., 46,  9,  1])>

In [15]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [16]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode("utf-8"))

F
i
r
s
t
 
C
i
t
i


In [17]:
seq_length = 100
example_per_epoch = len(text) // (seq_length + 1)

then convert these individual characters to sequence of desired size usung batch method.

In [18]:
sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

for seq in sequences.take(1):
  print(chars_from_ids(seq))

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)


In [19]:
#take token back to strings

for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


take sequence as an input, duplicates, and shift it to align the input and label for each timestep

In [20]:
def split_input_target(sequence):
  input_text = sequence[:-1]
  target_text = sequence[1:]
  return input_text, target_text

In [21]:
split_input_target(list("TensorFlow"))

(['T', 'e', 'n', 's', 'o', 'r', 'F', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'F', 'l', 'o', 'w'])

In [22]:
dataset = sequences.map(split_input_target)

In [23]:
for input_example, target_example in dataset.take(1):
  print("Input :", text_from_ids(input_example).numpy())
  print("Target :", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target : b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


**Now create training batches**

In [24]:
#Batch size
BATCH_SIZE = 64

#Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

dataset = (
    dataset.shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

**Then, build the model!**

The layer is:
1. tf.keras.layers.Embedding, input layer
2. tf.keras.layers.GRU: a type of RNN with size
3. tf.keras.layers.Dense: output layer

In [25]:
#TODO 3

#length of vocabulary in chars
vocal_size = len(vocab)

#embedding dimension
embedding_dim = 256

#number of RNN units
rnn_units = 1024

The code below do:


*   derive a class from tf.keras.Model
*   constructor use to define the layers of model
*   define the pass forward using layers defined in constructor





In [26]:
class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__() #self
        # TODO - Create an embedding layer
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        # TODO - Create a GRU layer
        self.gru = tf.keras.layers.GRU(
            rnn_units, return_sequences=True, return_state=True
        )
        # TODO - Finally connect it with a dense layer
        self.dense = tf.keras.layers.Dense(vocab_size)

    def call(self, inputs, states=None, return_state=False, training=False):
        x = self.embedding(inputs, training=training)
        # since we are training a text generation model,
        # we use the previous state, in training. If there is no state,
        # then we initialize the state
        if states is None:
            states = self.gru.get_initial_state(batch_size=tf.shape(x)[0])
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x

In [27]:
model = MyModel(
    # Be sure the vocabulary size matches the `StringLookup` layers.
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units,
)

**Try the model**

In [28]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(
        example_batch_predictions.shape,
        "# (batch_size, sequence_length, vocab_size)",
    )

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [29]:
model.summary()

to get actual predictions from model, need to sample the output distribution.

In [30]:
sampled_indices = tf.random.categorical(
    example_batch_predictions[0], num_samples=1
)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [31]:
sampled_indices

array([17, 60, 19, 49,  7,  7, 65, 17, 63, 55, 57, 46, 10,  2, 34, 20, 46,
        9, 65, 12, 64, 65, 12, 23, 45,  8, 12, 44, 59, 35, 34, 36, 53, 65,
       34, 34, 39, 10, 15, 47, 27, 34, 57, 21,  5, 62,  0, 63, 26, 14,  2,
       32,  7, 16, 58, 56, 34, 30, 63, 50, 54,  9, 31, 45,  7, 21, 26, 11,
        8, 39, 31, 27,  6, 33, 64, 11, 42,  1, 35, 51, 64, 20, 51, 37, 27,
       34,  0, 14,  0, 16,  2, 58, 59, 49, 35, 60, 25, 36, 53,  6])

In [32]:
#decode it

print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b"t abuses:\nTherefore use none: let Romeo hence in haste,\nElse, when he's found, that hour is his last"

Next Char Predictions:
 b"DuFj,,zDxprg3 UGg.z;yz;Jf-;etVUWnzUUZ3BhNUrH&w[UNK]xMA S,CsqUQxko.Rf,HM:-ZRN'Ty:c\nVlyGlXNU[UNK]A[UNK]C stjVuLWn'"


**Train the model**

this is use standart classification problem, it will predict the class of the next character.

Attach an optimizer and loss function

In [33]:
#add loss function
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

In [34]:
example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print(
    "Prediction shape: ",
    example_batch_predictions.shape,
    "# (batch_size, sequence_length, vocab_size)",
)
print("Mean loss:     ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66) # (batch_size, sequence_length, vocab_size)
Mean loss:      tf.Tensor(4.188002, shape=(), dtype=float32)


Much higher loss then vocal size mean the model is wrong. The normal one is equal to vocab size.

In [35]:
tf.exp(example_batch_mean_loss).numpy()

65.891014

Configyre the training procedure.

In [36]:
model.compile(optimizer="adam", loss=loss)

**Configure checkpoints**

In [37]:
#directory where checkpoints saved
checkpoint_dir = "./training_checkpoints"
#name of checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix, save_weights_only=True
)

**Execute training**

Use 10 epoch to keep training time reasonable.

In [38]:
EPOCHS = 10

In [39]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m911s[0m 5s/step - loss: 3.1489
Epoch 2/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m912s[0m 5s/step - loss: 1.9282
Epoch 3/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m900s[0m 5s/step - loss: 1.6346
Epoch 4/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m937s[0m 5s/step - loss: 1.4882
Epoch 5/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m904s[0m 5s/step - loss: 1.3986
Epoch 6/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m920s[0m 5s/step - loss: 1.3311
Epoch 7/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m916s[0m 5s/step - loss: 1.2808
Epoch 8/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m944s[0m 5s/step - loss: 1.2414
Epoch 9/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m918s[0m 5s/step - loss: 1.1997
Epoch 10/10
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m908s[0m 5s

**Generate text**

simple way to generate text with this model is run by loop.

In [45]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
        super().__init__()
        self.temperature = temperature
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_chars

        #create mask to prevent "[UNK]" from being generated
        skip_ids = self.ids_from_chars(["[UNK]"])[:, None]
        sparse_mask = tf.SparseTensor(
            #put -inf at each bad index
            values=[-float("inf")] * len(skip_ids),
            indices=skip_ids,
            #match the shape to vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())]
        )
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)

    @tf.function
    def generate_one_step(self, inputs, states=None):
        #convert strings to token IDs.
        input_chars = tf.strings.unicode_split(inputs, "UTF-8")
        input_ids = self.ids_from_chars(input_chars)
        #convert RaggedTensors to dense tensor
        input_ids = input_ids.to_tensor()

        #run the model
        predicted_logits, states = self.model(
            inputs=input_ids, states=states, return_state=True
        )
        #only use last prediction
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits / self.temperature
        #apply the prediction mask: prevent "[UNK]" from being generated
        predicted_logits = predicted_logits + self.prediction_mask

        #sample output logits to generate token IDs
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)

        #convert from token IDs to characters
        predicted_chars = self.chars_from_ids(predicted_ids)

        #return the characters and model state
        return predicted_chars, states

Run it in a loop to generate long text. But, with the small number of training epochs, it has not yet learned to form coherent sentences.

In [47]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [50]:
start = time.time()
states = None
next_char = tf.constant(["CHILDER:"])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(
        next_char, states=states
    )
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode("utf-8"), "\n\n" + "_" * 80)
print("\nRun time:", end - start)

CHILDER:
What man, I'll try you here?

SICINIUS:
May be done!
I would thee to my crown here to thee,
Upon my capers was for prop'd bright.

Servant:
He ready here let him up.
How upreyench out of whom women here?
I'll have a king King Herely to my soul.
I'll go with mine honourably can garm:
I'll have heaven tine; wine reward.
What! do, sir, the stage gall'd ones.

WARWICK:
So us to clut the duke to baid on him;
To dress the doom of other
Upon my knees, whrether was my skndels wights
and post mare much reyour ann tears;
Gozner that I loved thee spench. I stand
Attwnich heir to us. 'TY O, Caius Marcius,
Let's had not brook and lent; it shall near's blood,
The villain roward once, to find so please you both.
Fie it is much sleep; but ever thou draw now,
And were it the bodomity, age tos
If Thomas to my friend By once may
Unless to my difference; and have you
Out, as I tell thee quickly, in their heels,
Or let get him each confess to to and
But as not smiled table and go
and joins, here v

To improve the result use EPOCH=30, bare minimum.

Or by adding another RNN layer to improve model accuracy.

In [51]:
start = time.time()
states = None
next_char = tf.constant(["ROMEO:", "ROMEO:", "ROMEO:", "ROMEO:", "ROMEO:"])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_one_step(
        next_char, states=states
    )
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, "\n\n" + "_" * 80)
print("\nRun time:", end - start)

tf.Tensor(
[b"ROMEO:\nShould have I lay that word 'tis like your city\nWithin my loving son of these fisless faith,\nOn the heavy dupared, breating, and my\nneither now you deniced, and undergreet\nOne post to her here fast to looked\nBut mender out a man levers together,\nHow is it to claf our misses and be sobered,\nPluckle forgot his banies' masquence\nFrom Romeo blows, blest awhile to talk on death\nTo free mechers so, and mustering vile,\nIf they have never but blood this murderer,\nThe writt-net days for governgury.\nI should mine honesty in our cousin.\nWelcome, my lord, great Henry three death!\nSome mighty neither corsels for her,\nthe truth of resign whose unpossibler\nOf the duke of Lord of Clifford\nWhom valiant best of your blood and Duke of Norfolk,\nThe our speed as he should come to jest.\n\nDUKE VINCENTIO:\nHow! Which else? They cameftake tom great servant,\nAs in a miserable, he will to\ndo it straight, nor he shall not but a postedier.\n\nBIANCA:\nOn Paris lips, Vili

**Export the model**

In [53]:
tf.saved_model.save(one_step_model, "one_step")
one_step_reloaded = tf.saved_model.load("one_step")

In [54]:
states = None
next_char = tf.constant(["WARWICK:"])
result = [next_char]

for n in range(100):
    next_char, states = one_step_reloaded.generate_one_step(
        next_char, states=states
    )
    result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

WARWICK:
Say what 'hose is the caon? I never sad
for me come from his babe: but thou didst small have
man to
