In [0]:
!pip install tensorflow==2

In [0]:
import tensorflow as tf
import numpy as np
import os
import time

In [3]:
tf.__version__

'2.0.0'

In [4]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
                                       'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


### Read the data

In [5]:
with open(path_to_file, 'rb') as f:
    text = f.read().decode(encoding='utf-8')

print(f"Length of text: {len(text)} characters")

Length of text: 1115394 characters


In [6]:
# Example
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



The unique characters in the file

In [7]:
vocab = sorted(set(text))
print(f"{len(vocab)} unique characters")

65 unique characters


Character <-> integer mappers

In [0]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [9]:
# Example
print("{} are mapped to {}".format(repr(text[:13]),
                                   text_as_int[:13]))

'First Citizen' are mapped to [18 47 56 57 58  1 15 47 58 47 64 43 52]


### Create training examples and targets

For each *input* sequence, its *target* sequence will have the same length, except shifted one character to the right.

In [10]:
seq_length = 100
examples_per_epoch = len(text) // (seq_length + 1)
print("examples_per_epoch =", examples_per_epoch)

# Create training examples and targets.
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# Example
for i in char_dataset.take(5):
    print(idx2char[i.numpy()])

examples_per_epoch = 11043
F
i
r
s
t


Use the `batch` method to convert these individual characters to sequences of the desired size.

In [11]:
sequences = char_dataset.batch(seq_length, drop_remainder=True)

for item in sequences.take(5):
    print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
' are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you'
" know Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us"
" kill him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it "
'be done: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor'


For each sequence, duplicate and shift it to prepare the input and target.

Use `map` to apply to each batch

In [0]:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [13]:
# Example
for input_example, target_example in dataset.take(1):
    print("Input sequence:", repr(''.join(idx2char[input_example.numpy()])))
    print("Target sequence:", repr(''.join(idx2char[target_example.numpy()])))

Input sequence: 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYo'
Target sequence: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'


Expected training dynamics for a few time steps:

In [14]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step", i)
    print("  input: {} ({})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({})".format(target_idx, repr(idx2char[target_idx])))

Step 0
  input: 18 ('F')
  expected output: 47 ('i')
Step 1
  input: 47 ('i')
  expected output: 56 ('r')
Step 2
  input: 56 ('r')
  expected output: 57 ('s')
Step 3
  input: 57 ('s')
  expected output: 58 ('t')
Step 4
  input: 58 ('t')
  expected output: 1 (' ')


### Create training batches

In [15]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset

<BatchDataset shapes: ((64, 99), (64, 99)), types: (tf.int64, tf.int64)>

### Model

We will use
* `tf.keras.layers.Embedding`
* `tf.keras.layers.GRU`
* `tf.keras.layers.Dense`

In [16]:
vocab_size = len(vocab)
print("vocab_size:", vocab_size)
embedding_dim = 256
rnn_units = 1024

vocab_size: 65


In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size,
                                  embedding_dim,
                                  batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
    ])
    return model

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, None, 256)           16640     
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_1 (Dense)              (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


The model predicts a sequence of characters as follows:

![](https://www.tensorflow.org/tutorials/text/images/text_generation_training.png)

### Try the model

In [0]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape,
          "# (batch_size, seq_length, vocab_size)")

(64, 99, 65) # (batch_size, seq_length, vocab_size)


Note that the model can process sequences of any length.

Now let's try to *actualize* the prediction by sampling each character from the output distribution.

In [0]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [0]:
print("Input:\n", repr(''.join(idx2char[input_example_batch[0]])))
print()
print("Prediction:\n", repr(''.join(idx2char[sampled_indices])))

Input:
 " thee burnt.\n\nPAULINA:\nI care not:\nIt is an heretic that makes the fire,\nNot she which burns in't. "

Prediction:
 "laYWLmPE!mrUVmOvXEMMRtCSyTbhmrGbZuvuPFyWpCWcEk'sN.UHP!YrsgICvYYqc'juhJ&FOP ;oENRB-e:hIA\nq!BK'vlSVnn"


### Train the model

In [0]:
def loss(labels, logits):
    """
    labels: (batch, seq)
        Each value is an index corresponding to the true label.
    logits: (batch, seq, vocab)
        Each [i,j,:] is a vector of logits over the characters.
    """
    return tf.keras.losses.sparse_categorical_crossentropy(
        labels, logits, from_logits=True
    )

In [0]:
example_batch_loss = loss(target_example_batch,
                          example_batch_predictions)

print("Prediction shape:", example_batch_predictions.shape,
      "# (batch_size, seq_length, vocab_size)")
print("Loss shape:", example_batch_loss.shape)
print("Scalarized loss:", example_batch_loss.numpy().mean())

Prediction shape: (64, 99, 65) # (batch_size, seq_length, vocab_size)
Loss shape: (64, 99)
Scalarized loss: 4.175805


Configure the training procedure:

In [0]:
model.compile(optimizer='adam',
              loss=loss)

Configure checkpoints

In [0]:
# Directory to save the checkpoints
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

Execute the training

In [0]:
EPOCHS = 10
history = model.fit(dataset,
                    epochs=EPOCHS,
                    callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
  2/174 [..............................] - ETA: 17:28 - loss: 1.3497

KeyboardInterrupt: ignored

In [0]:
!ls {checkpoint_dir}

checkpoint		    ckpt_4.index
ckpt_1.data-00000-of-00001  ckpt_5.data-00000-of-00001
ckpt_1.index		    ckpt_5.index
ckpt_2.data-00000-of-00001  ckpt_6.data-00000-of-00001
ckpt_2.index		    ckpt_6.index
ckpt_3.data-00000-of-00001  ckpt_7.data-00000-of-00001
ckpt_3.index		    ckpt_7.index
ckpt_4.data-00000-of-00001


## Generate text
### Restore the latest checkpoint

For simplicity, we will use a batch size of 1 for predictions.

Because of the way the RNN state is passed from timestep to timestep, to change the batch size, we need to rebuild the the model and restore the save weights.

In [0]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_7'

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [0]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_2 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


### Prediction loop

![](https://www.tensorflow.org/tutorials/text/images/text_generation_sampling.png)

In [0]:
def generate_text(model, start_string):
    # Number of characters to generate
    num_generate = 1000

    # Convert the start string to numbers.
    input_eval = [char2idx[ch] for ch in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    # -> (1, start_string)

    text_generated = []

    # Low temperatures result in more predictable texts.
    # High temperatures result in more surprising texts.
    # Experiment to find the best setting!
    temperature = 1.0

    # Recall that the batch size is 1.
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # -> (1, seq, vocab)
        # Remove the batch dimension.
        predictions = tf.squeeze(predictions, 0)
        # -> (seq, vocab)

        # Sample the word.
        predictions = predictions / temperature
        sample = tf.random.categorical(predictions, num_samples=1)
        # -> (seq, num_samples)
        predicted_idx = sample[-1,0].numpy()
        # -> scalar

        # Pass the predicted word as the next input
        # along with the previous hidden state.
        input_eval = tf.expand_dims([predicted_idx], 0)
        # -> (1, 1)

        text_generated.append(idx2char[predicted_idx])

    return start_string + ''.join(text_generated)

In [0]:
print(
    generate_text(model, start_string=u"ROMEO: ")
)

ROMEO: and woman make it,
Dear'st thy deed, and So let thee to the Tower.

CLARINCE:
I word your husband's deviceth made I come?
What if the sup in me: I to my Lord God madam,
He is not the almits of my unburning wofur of their second men.
Is thither we name of vingia you, that ye'll stay this face.

TRANIO:
O great plagion'd fellows are mo what ever I begon and even?
I was are touch'd the greatel treed to say your next:
What much thy deadure servant on mine,
Endorly thy royal travely's great doth
to your follow stay to-my; for Grouble ploce,
Which shall saig,
Hethinks shall hann that call'd to him?

GLOUCESTER:

CORIOLANUS:
I proy,' -quie the other relighty
Than seems are ill one of Goet to thy obitees to tale
give a well and falome!

QUEEN:
I te Eny.

LEONTES:
The kenant occarate to let.

MENENIUS:
Ay, with that, you beholding thouse. Hather, had he take
To scark forward, fell on, nor in me;
And never bees as one with us, which
Tear my endity lord! Therefore do that is hope to take T

### Customized training

In [20]:
model = build_model(vocab_size, embedding_dim, rnn_units, BATCH_SIZE)
model.summary()
optimizer = tf.keras.optimizers.Adam()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, None, 256)           16640     
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_1 (Dense)              (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [0]:
@tf.function
def train_step(model, optimizer, inp, target):
    with tf.GradientTape() as tape:
        predictions = model(inp)
        loss = tf.reduce_mean(
            tf.keras.losses.sparse_categorical_crossentropy(target,
                                                            predictions,
                                                            from_logits=True))
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

In [25]:
EPOCHS = 5

for epoch in range(EPOCHS):
    start_time = time.time()

    # Initialize the hidden state at the start of every epoch.
    # Initially, the hidden state is None.
    hidden = model.reset_states()

    template = "Epoch {} Batch {} Loss {}"
    for batch_n, (inp, target) in enumerate(dataset):
        loss = train_step(model, optimizer, inp, target)

        if not batch_n % 100:
            print(template.format(epoch+1, batch_n, loss))
    
    # Save the model every 5 epochs.
    if not (epoch+1) % 5:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))
    
    print("Epoch {} Loss {:.4f}".format(epoch+1, loss))
    print("Time taken for the epoch: {} sec".format(time.time() - start_time))
    print()

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 4.173699378967285
Epoch 1 Batch 100 Loss 2.3661739826202393
Epoch 1 Loss 2.1368
Time taken for the epoch: 904.8282017707825 sec

Epoch 2 Batch 0 Loss 2.126349449157715
Epoch 2 Batch 100 Loss 1.904176950454712
Epoch 2 Loss 1.8429
Time taken for the epoch: 875.7926864624023 sec

Epoch 3 Batch 0 Loss 1.76738440990448
Epoch 3 Batch 100 Loss 1.6135321855545044
Epoch 3 Loss 1.5937
Time taken for the epoch: 864.9985647201538 sec

Epoch 4 Batch 0 Loss 1.6102852821350098
Epoch 4 Batch 100 Loss 1.5031448602676392
Epoch 4 Loss 1.5325
Time taken for the epoch: 857.3296134471893 sec

Epoch 5 Batch 0 Loss 1.4522991180419922
Epoch 5 Batch 100 Loss 1.4587880373001099
Epoch 5 Loss 1.4327
Time taken for the epoch: 858.1769199371338 sec



Test the model.

In [26]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_2 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_2 (Dense)              (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [28]:
print(
    generate_text(model, start_string=u"JULIET: ")
)

JULIET: a fear!

BUCKINGHAM:
Poor Vantague, sir,--

GRUMIO:
O, the shame of whats he will men of kiss
Internen hold men made a shalt
You was done for your heart, now not a kingdance esha'd:
In this envite is witholow.

Medsenger: and he do'e; the omerient past cheiter: and cowfice
You might barect at him for all the crowning to so scrient?
O divine agas,
Harght, I contempt to Frolford,
Whom sho With witness to bed;
For she mother him.

AUFIDIUS:
Thy hands! Fare ever mother,
For hills it stare in learn,
No ray can receive the enver rate was the thum I desire of our roy, promiceion

joint me with this glars and seendly.

FLORCE:
He ha! Do power me to great
Sigh so redied; that Gentleman so grace in my head;
But say'st thee was so ip ray; unto him.

Perant:
Ploates of good comeo, to God, walking thee we execution.
Ines not children
Englock'd my visitiones of virtue, whomaticy hath his misin wrong
From mine officer? What,' then, let mill
Of ove go an exiumppozer.
An it, I'll have him so ni