In [1]:
import tensorflow as tf
import numpy as np
import os
import time

In [2]:
# Will save the file to .keras
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
text = open(path_to_file).read()
print(text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [4]:
# Since we're gonna train on characters, get the unique characters of the text
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

65 unique characters


In [5]:
example_texts = ['abcdefg', 'xyz']
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')


ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None
)

chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None
)

In [6]:
ids = ids_from_chars(chars)
chars = chars_from_ids(ids)

print(ids)
print(chars)

<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>
<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>


In [7]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

print(text_from_ids(ids).numpy()[0])

b'abcdefg'


In [8]:
# Training model to  predict following:
# Given a character, or a sequence of characters, what is the most probable next character?

all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)


In [9]:
for ids in ids_dataset.take(10):
    print(chars_from_ids(ids).numpy().decode('utf-8'))

F
i
r
s
t
 
C
i
t
i


In [10]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)

sequences = ids_dataset.batch(seq_length + 1, drop_remainder=True)

In [11]:
for seq in sequences.take(5):
    print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [12]:
# Create dataset of (input, label)

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

split_input_target(list("Tensorflow"))

(['T', 'e', 'n', 's', 'o', 'r', 'f', 'l', 'o'],
 ['e', 'n', 's', 'o', 'r', 'f', 'l', 'o', 'w'])

In [13]:
# Split all the sequences in sequences
dataset = sequences.map(split_input_target)

for input_example, target_example in dataset.take(1):
    print("Input :\t", text_from_ids(input_example).numpy())
    print("Target:\t", text_from_ids(target_example).numpy())

Input :	 b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target:	 b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [14]:
# Will create batches of batch_size and shuffle them buffer_size at a time
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

print(dataset)

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>


In [15]:
# Build the model
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

class MyModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, rnn_units):
        super().__init__(self)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(
            rnn_units,
            return_sequences=True,
            return_state=True
        )
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        if states is None:
            states = self.gru.get_initial_state(x)
        x, states = self.gru(x, initial_state=states, training=training)
        x = self.dense(x, training=training)

        if return_state:
            return x, states
        else:
            return x


In [16]:
model = MyModel(
    vocab_size = len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units
)

In [17]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary()

(64, 100, 66) # (batch_size, sequence_length, vocab_size)
Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4,022,850
Trainable params: 4,022,850
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Testing the untrained model
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

sampled_indices

print("Input:\n", text_from_ids(input_example_batch[0]).numpy(), "\n")
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b'ambush of my name, strike home,\nAnd yet my nature never in the fight\nTo do in slander. And to behold' 

Next Char Predictions:
 b"rq!jl:NlBpnmjPGDXmXd[UNK]yaEla$[UNK]ENtzaHdsI:uPMS-,$wMaLJZ:&D.wOgBiHmv[UNK]iaakfZeoBz.[UNK]RT3Rm!nQJ'iIUhCkZHBd'djs"


In [19]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

example_batch_loss = loss(target_example_batch,example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         4.188535


In [20]:
# The untrained model shouldn't be too sure of itself -> Get approx vocab size
print(tf.exp(mean_loss).numpy())

65.926155


In [21]:
model.compile(optimizer='adam', loss=loss, metrics=["accuracy"])

In [22]:
checkpoint_dir = "../data/C3-RNNShakespeareCharacter"
try:
    os.mkdir(checkpoint_dir)
except OSError as e:
    print(e)

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

[Errno 2] No such file or directory: '../data/C3-RNNShakespeareCharacter'


In [23]:
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
  filepath = checkpoint_prefix,
  save_weights_only = True
)

In [24]:
EPOCHS = 20

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
# Generate text

class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [27]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [29]:
start = time.time()
states = None
next_char = tf.constant(['JULIET:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

JULIET:

VOLUMNIA:
Away with him to seven. thou must needs conqueron.

COMINIUS:
What's these I 'He's a sweeter hate.

KING RICHARD II:
So--Nor I. I'll open the house of Lancas.
Ay, what should you sleep how to deny him, princely gentleman
Wherein your party have and rotten sweet:
I had sworn thus construe that thy bosomily sun.
Under my lord, I'll woo her, by the hard way hands upon.
Why art, come? not the Duke of Gloucester?

Lord Mayor:
Was not Saints have we spent intolting witness
Are spaced beat to part them in the traitor, justify in prover
Turn to die, And loved this, would you have to knowledge
Of his master's blood, when you have deserved thee
and thus may be angry but that were words:
Besides, the people did prefer thee
Let him deserve to honour and his knife,
Whose haughty, bleed in all, best rabst.

WARWICK:
Find you so shall you hear ha's yet no more deeds?
For you, nom government that you did;
Thou shouldst eleven in a maiden man
Is not a sense of him that hath the reart

In [30]:
# Batch the inputs to create more examples effectively

start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b"ROMEO:\nLook hither he indeed,\nThan when I saw; villain, despite of all gone:\nBlue, if you will, sir, what's thy air?\nHow hath remember well they thurdy youth:\nBe it possible.\n\nDUKE VINCENTIO:\nFaith, here's young skill be spoken.\n\nPROSPERO:\nHe was a kind of leave, I'll carry it\nas they are for Tybalt. Will the white robbeys may square thee hence;\nBut all together when he will seruct their distrust,\nResign'd and ship the swayless fury.\n\nMersen:\nWhy, what should you sleep? 'tis begin ty-morn; sleep in your grace's haste.\n\nWARWICK:\nAy, wherefore gentle sirs!\n\nGREMIO:\nNot to know, your funches are that good deputy\nIs by the loss of her most ears by mind; but what\nwe repair'd the court, of an agrective prove,\nTo subject the devil's day: if thou fled,\nAs I disdoubt every Juliet.\n\nKING RICHARD II:\nRight, you are as this young prince, and Saint A Endless,\nWhere I have gostion of himself among these rats that makes me\nFor the Galogs, lie himself to y

In [31]:
# Save and load the trained model

tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [32]:
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(100):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

ROMEO:
Indeed, by God's sof, sir; no.

MENENIUS:
Only of what with a bowl down yet were past
Like an oppor
