# More Realistic Language Modeling & Recurrent Neural Networks



In [0]:
%tensorflow_version 2.x

In [0]:
!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading https://files.pythonhosted.org/packages/e6/f9/0626bbdb322e3a078d968e87e3b01341e7890544de891d0cb613641220e6/ipython-autotime-0.1.tar.bz2
Building wheels for collected packages: ipython-autotime
  Building wheel for ipython-autotime (setup.py) ... [?25l[?25hdone
  Created wheel for ipython-autotime: filename=ipython_autotime-0.1-cp36-none-any.whl size=1832 sha256=2b9ec372600ddf66d82fd79983fce1145c7ee5ccd1f49b8a2b7e8ca7c8a750c5
  Stored in directory: /root/.cache/pip/wheels/d2/df/81/2db1e54bc91002cec40334629bc39cfa86dff540b304ebcd6e
Successfully built ipython-autotime
Installing collected packages: ipython-autotime
Successfully installed ipython-autotime-0.1


In [0]:
import os
os.getcwd()

'/content'

In [0]:
os.chdir("/content/drive/My Drive/Colab Notebooks")

In [0]:
os.getcwd()

'/content/drive/My Drive/Colab Notebooks'

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
tf.keras.utils.get_file("/content/drive/My Drive/Colab Notebooks/shakespeare.txt", 
                        "https://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt")

'/content/drive/My Drive/Colab Notebooks/shakespeare.txt'

time: 2.9 ms


# Re-implementation of low-level RNN with Keras functionality

Reference: [Text generation with an RNN](https://www.tensorflow.org/tutorials/text/text_generation)

In [0]:
!python prepare_data.py shakespeare.txt skp

2020-06-01 12:31:13.725661: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 22981 sequences...
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences...
Serialized 2000 sequences...
Serialized 2100 sequences...
Serialized 2200 sequences...
Serialized 2300 sequences...
Serialized 2400 sequences...
Serialized 2500 sequences...
Serialized 2600 sequences...
Serialized 2700 sequences...
Serialized 2800 sequences...
Serialized 2900

In [0]:
from prepare_data import parse_seq
import pickle
# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("skp.tfrecords")

# this maps a parser function that properly interprets the bytes over the dataset
# (with fixed sequence length 200)
# if you change the sequence length in preprocessing you also need to change it here
seq_len = 200
data = data.map(lambda x: parse_seq(x, seq_len))

# a map from characters to indices
vocab = pickle.load(open("skp_vocab", mode="rb")) 
#unpickling-convert byte stream to python object structure #read bytes
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab_size) #26(uppercase)+26(lowercase)+16(special characters)=68
print(vocab) 
print(ind_to_ch)

68
{'n': 1, 'R': 2, ':': 3, "'": 4, 'B': 5, 'g': 6, 'M': 7, 'P': 8, 'w': 9, '3': 10, 'a': 11, 'L': 12, '\n': 13, 'A': 14, 'b': 15, 'X': 16, '.': 17, '&': 18, 'Z': 19, '!': 20, 'f': 21, 'i': 22, 'u': 23, 'q': 24, 'K': 25, 'm': 26, 'N': 27, 's': 28, '-': 29, 'r': 30, 'E': 31, 'z': 32, 'p': 33, 'l': 34, '$': 35, 'c': 36, 'S': 37, 'H': 38, 'd': 39, 'y': 40, 'J': 41, 'C': 42, 'o': 43, 'h': 44, 't': 45, '?': 46, 'T': 47, 'Q': 48, ';': 49, 'W': 50, 'O': 51, 'Y': 52, 'v': 53, 'I': 54, 'U': 55, 'D': 56, '[': 57, 'G': 58, ']': 59, 'e': 60, 'x': 61, ',': 62, 'k': 63, 'F': 64, 'j': 65, ' ': 66, 'V': 67, '<S>': 0}
{1: 'n', 2: 'R', 3: ':', 4: "'", 5: 'B', 6: 'g', 7: 'M', 8: 'P', 9: 'w', 10: '3', 11: 'a', 12: 'L', 13: '\n', 14: 'A', 15: 'b', 16: 'X', 17: '.', 18: '&', 19: 'Z', 20: '!', 21: 'f', 22: 'i', 23: 'u', 24: 'q', 25: 'K', 26: 'm', 27: 'N', 28: 's', 29: '-', 30: 'r', 31: 'E', 32: 'z', 33: 'p', 34: 'l', 35: '$', 36: 'c', 37: 'S', 38: 'H', 39: 'd', 40: 'y', 41: 'J', 42: 'C', 43: 'o', 44: 'h', 45

In [0]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = data.map(split_input_target)

time: 51.1 ms


In [0]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

<BatchDataset shapes: ((64, 199), (64, 199)), types: (tf.int32, tf.int32)>

time: 6.11 ms


In [0]:
# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

time: 1.35 ms


In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

time: 2.46 ms


In [0]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

time: 219 ms


In [0]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           17408     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 68)            69700     
Total params: 4,025,412
Trainable params: 4,025,412
Non-trainable params: 0
_________________________________________________________________
time: 3.21 ms


In [0]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 199, 68) # (batch_size, sequence_length, vocab_size)
time: 6.87 s


In [0]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 199, 68)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.219768
time: 13.8 ms


In [0]:
model.compile(optimizer='adam', loss=loss)

time: 16.1 ms


In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

time: 1.66 ms


In [0]:
EPOCHS=10

time: 895 µs


# Training- Using model.fit

In [0]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
time: 6min 38s


# Text generation- Keeping batch size = 1

In [0]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_10'

time: 9.66 ms


In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

time: 288 ms


In [0]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            17408     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (1, None, 68)             69700     
Total params: 4,025,412
Trainable params: 4,025,412
Non-trainable params: 0
_________________________________________________________________
time: 2.42 ms


In [0]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [vocab[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(ind_to_ch[predicted_id])

  return (start_string + ''.join(text_generated))

time: 16.1 ms


In [0]:
print(generate_text(model, start_string=u"ROMEO: "))

ROMEO: O, if thou dies!

JULIA:
O, let me speak; speak for you:
We do not know it; I have known your praise.
How many father?

MISTRESS QUICKLY:
I'll prove it.

IMOGEN:
Some strange instruments, good sir,
Are the same DouR ROSH:
What ring!

CASCI:
Fare you well.

First Senator:
Now shall it be, when I made daintin than
forger: if nothing was fot once our vow'd before,
We swallowed by the specularly break?
Now king is him: how show'd me solely gold,
Shall poor a worthy as confine before!

VALENTINE:
This night it speedless: I will murder me,
Gloucester.

HASTINGS:
My lord, bravely have your services
Of all my dowry, now I do not know--as I bide
The king so much disordeth as a body
That our general expeditions.

Gien:
Four-if would do hair of holy commonwealth!

LUCENTIO:
You shall have long even in that house I find to
do't: come, my cow whence recounters: their passions
From forpease whose hoppiness to wear our
tents to antent upon old and a dullew;
Who, I am king in sword again, or th

# With custom training loop
using tf.GradientTape()

In [0]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

time: 210 ms


In [0]:
optimizer = tf.keras.optimizers.Adam()

time: 1.04 ms


In [0]:
@tf.function
def train_step(inp, target):
  with tf.GradientTape() as tape:
    predictions = model(inp)
    loss = tf.reduce_mean(
        tf.keras.losses.sparse_categorical_crossentropy(
            target, predictions, from_logits=True))
  grads = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return loss

time: 7.04 ms


In [0]:
import time
# Training step
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  # initializing the hidden state at the start of every epoch
  # initally hidden is None
  hidden = model.reset_states()

  for (batch_n, (inp, target)) in enumerate(dataset):
    loss = train_step(inp, target)

    if batch_n % 100 == 0:
      template = 'Epoch {} Batch {} Loss {}'
      print(template.format(epoch+1, batch_n, loss))

  # saving (checkpoint) the model every 5 epochs
  if (epoch + 1) % 5 == 0:
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

  print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
  print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 4.21998929977417
Epoch 1 Batch 100 Loss 2.3428797721862793
Epoch 1 Batch 200 Loss 2.0970232486724854
Epoch 1 Batch 300 Loss 1.8865776062011719
Epoch 1 Loss 1.7760
Time taken for 1 epoch 40.94127154350281 sec

Epoch 2 Batch 0 Loss 1.7895363569259644
Epoch 2 Batch 100 Loss 1.6617414951324463
Epoch 2 Batch 200 Loss 1.5498625040054321
Epoch 2 Batch 300 Loss 1.494864821434021
Epoch 2 Loss 1.5081
Time taken for 1 epoch 38.597591400146484 sec

Epoch 3 Batch 0 Loss 1.4671552181243896
Epoch 3 Batch 100 Loss 1.4405620098114014
Epoch 3 Batch 200 Loss 1.4301587343215942
Epoch 3 Batch 300 Loss 1.390113115310669
Epoch 3 Loss 1.3489
Time taken for 1 epoch 39.1321005821228 sec

Epoch 4 Batch 0 Loss 1.3332915306091309
Epoch 4 Batch 100 Loss 1.333824872970581
Epoch 4 Batch 200 Loss 1.3624721765518188
Epoch 4 Batch 300 Loss 1.347478985786438
Epoch 4 Loss 1.3610
Time taken for 1 epoch 38.81589198112488 sec

Epoch 5 Batch 0 Loss 1.3021132946014404
Epoch 5 Batch 100 Loss 1.3141967058181

# Dealing with Variable-length Sequences

## With padding and masking

Reference: [Text generation with an RNN](https://www.tensorflow.org/tutorials/text/text_generation)

In [0]:
import tensorflow as tf
import numpy as np

In [0]:
import os
os.chdir("/content/drive/My Drive/Colab Notebooks")

Longest remaining sequence has length 499.

In [0]:
!python prepare_data2.py  shakespeare.txt shake \\n\\n+ --maxlen 500

2020-06-02 07:27:41.224032: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
Split input into 31022 sequences...
Longest sequence is 3094 characters. If this seems unreasonable, consider using the maxlen argument!
Removing sequences longer than 500 characters...
29429 sequences remaining.
Longest remaining sequence has length 499.
Removing length-0 sequences...
29429 sequences remaining.
Serialized 100 sequences...
Serialized 200 sequences...
Serialized 300 sequences...
Serialized 400 sequences...
Serialized 500 sequences...
Serialized 600 sequences...
Serialized 700 sequences...
Serialized 800 sequences...
Serialized 900 sequences...
Serialized 1000 sequences...
Serialized 1100 sequences...
Serialized 1200 sequences...
Serialized 1300 sequences...
Serialized 1400 sequences...
Serialized 1500 sequences...
Serialized 1600 sequences...
Serialized 1700 sequences...
Serialized 1800 sequences...
Serialized 1900 sequences..

In [0]:
from prepare_data2 import parse_seq
import pickle
# this is just a datasets of "bytes" (not understandable)
data = tf.data.TFRecordDataset("shake.tfrecords")

data = data.map(lambda x: parse_seq(x))

# a map from characters to indices
vocab = pickle.load(open("shake_vocab", mode="rb")) 
#unpickling-convert byte stream to python object structure #read bytes
vocab_size = len(vocab)
# inverse mapping: indices to characters
ind_to_ch = {ind: ch for (ch, ind) in vocab.items()}

print(vocab_size) 
print(vocab) 
print(ind_to_ch)

70
{'E': 3, '3': 4, ',': 5, 't': 6, 'N': 7, 'P': 8, 'X': 9, 'w': 10, 'h': 11, 'o': 12, 'L': 13, 'g': 14, 'f': 15, 'p': 16, 'H': 17, '-': 18, 'Z': 19, 'k': 20, 'U': 21, 'O': 22, 'W': 23, 'M': 24, ';': 25, 'V': 26, 's': 27, 'c': 28, 'T': 29, 'S': 30, 'j': 31, 'q': 32, '[': 33, 'K': 34, 'F': 35, ':': 36, ']': 37, 'R': 38, 'J': 39, 'm': 40, 'b': 41, 'x': 42, '?': 43, '!': 44, "'": 45, 'Y': 46, '$': 47, 'B': 48, 'v': 49, 'C': 50, '\n': 51, '.': 52, 'd': 53, '&': 54, 'i': 55, 'z': 56, 'G': 57, 'I': 58, 'e': 59, 'r': 60, 'a': 61, 'n': 62, 'y': 63, 'u': 64, 'A': 65, 'D': 66, ' ': 67, 'l': 68, 'Q': 69, '<PAD>': 0, '<S>': 1, '</S>': 2}
{3: 'E', 4: '3', 5: ',', 6: 't', 7: 'N', 8: 'P', 9: 'X', 10: 'w', 11: 'h', 12: 'o', 13: 'L', 14: 'g', 15: 'f', 16: 'p', 17: 'H', 18: '-', 19: 'Z', 20: 'k', 21: 'U', 22: 'O', 23: 'W', 24: 'M', 25: ';', 26: 'V', 27: 's', 28: 'c', 29: 'T', 30: 'S', 31: 'j', 32: 'q', 33: '[', 34: 'K', 35: 'F', 36: ':', 37: ']', 38: 'R', 39: 'J', 40: 'm', 41: 'b', 42: 'x', 43: '?', 44:

In [0]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = data.map(split_input_target)

In [0]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000
dataset= dataset.shuffle(46000).repeat()
dataset=dataset.padded_batch(BATCH_SIZE, padded_shapes=([499],[499]),drop_remainder=True)

In [0]:
# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, 499]),
    tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [0]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, 499, 256)            17920     
_________________________________________________________________
gru_1 (GRU)                  (64, 499, 1024)           3938304   
_________________________________________________________________
dense_1 (Dense)              (64, 499, 70)             71750     
Total params: 4,027,974
Trainable params: 4,027,974
Non-trainable params: 0
_________________________________________________________________


In [0]:
optimizer = tf.keras.optimizers.Adam()

In [0]:
@tf.function
def train_step(inp, target):
  with tf.GradientTape() as tape:
    predictions = model(inp)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=predictions, name=None)
    zero_count = tf.math.count_nonzero(inp, axis=1, keepdims=None, dtype=tf.dtypes.int64, name=None) - 1
    mask = tf.sequence_mask(zero_count, 499,dtype=tf.dtypes.float32)
    masked_loss = tf.math.multiply(loss, mask)
    red_masked_loss = tf.cast(tf.math.reduce_sum(masked_loss, axis=[0, 1], keepdims=False, name=None), tf.float32)
    total_zero_count = tf.cast(tf.reduce_sum(zero_count, 0), tf.float32)
    final_loss = red_masked_loss / total_zero_count
    
  grads = tape.gradient(final_loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  return final_loss

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [0]:
import time
# Training step
EPOCHS = 10

for epoch in range(EPOCHS):
  start = time.time()

  # initializing the hidden state at the start of every epoch
  # initally hidden is None
  hidden = model.reset_states()
  for (batch_n, (inp, target)) in enumerate(dataset):
    if batch_n > 300:
      break

    loss = train_step(inp, target)

    if batch_n % 100 == 0:
      template = 'Epoch {} Batch {} Loss {}'
      print(template.format(epoch+1, batch_n, loss))

  # saving (checkpoint) the model every 5 epochs
  if (epoch + 1) % 5 == 0:
    model.save_weights(checkpoint_prefix.format(epoch=epoch))

  print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
  print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 4.249100208282471
Epoch 1 Batch 100 Loss 2.3698971271514893
Epoch 1 Batch 200 Loss 2.131608486175537
Epoch 1 Batch 300 Loss 1.8823298215866089
Epoch 1 Loss 1.8823
Time taken for 1 epoch 192.9318413734436 sec

Epoch 2 Batch 0 Loss 1.9282201528549194
Epoch 2 Batch 100 Loss 1.7172625064849854
Epoch 2 Batch 200 Loss 1.6215095520019531
Epoch 2 Batch 300 Loss 1.5679713487625122
Epoch 2 Loss 1.5680
Time taken for 1 epoch 192.51146411895752 sec

Epoch 3 Batch 0 Loss 1.5721690654754639
Epoch 3 Batch 100 Loss 1.5134748220443726
Epoch 3 Batch 200 Loss 1.429953932762146
Epoch 3 Batch 300 Loss 1.4265996217727661
Epoch 3 Loss 1.4266
Time taken for 1 epoch 194.7809624671936 sec

Epoch 4 Batch 0 Loss 1.4064254760742188
Epoch 4 Batch 100 Loss 1.4210389852523804
Epoch 4 Batch 200 Loss 1.3520255088806152
Epoch 4 Batch 300 Loss 1.3290760517120361
Epoch 4 Loss 1.3291
Time taken for 1 epoch 194.2234833240509 sec

Epoch 5 Batch 0 Loss 1.3120431900024414
Epoch 5 Batch 100 Loss 1.301718473

In [0]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_9'

In [0]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size = 1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

#suppress warning messages
import logging
tf.get_logger().setLevel(logging.ERROR)

In [0]:
model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_12 (Embedding)     (1, 499, 256)             17920     
_________________________________________________________________
gru_12 (GRU)                 (1, 499, 1024)            3938304   
_________________________________________________________________
dense_12 (Dense)             (1, 499, 70)              71750     
Total params: 4,027,974
Trainable params: 4,027,974
Non-trainable params: 0
_________________________________________________________________


In [0]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [vocab[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(ind_to_ch[predicted_id])

  return (start_string + ''.join(text_generated))

In [0]:
print(generate_text(model, start_string=u"KING: "))

KING: Cheris tame their fruit;
And woman shall I eat the senate secrech
Could hold over from us and you take hole.
May I say my heart? bring forth now will say some
than thou dost heavy to you?
What, thou couldst not? I toick'd but hither,
Do in all distemption to Speak or in,
Where you shall be so 'twas wrought here to leave.
Honderful at the meral, that hope stands straight.
Come to perfect, I am a-mad: an if more
might not till then, when you will not have merry.
What state, the queen's fashion, I say! the bush of them
Shall never seen it from the better, and the witness
Which you forgotten to this medlia in your
stripsting, as I wak it, I do triumph to-night,
Because it were down: I am command thee! is it
do assist. I progument ro sprishet the penny
of much lodies: 'tis so well deserved:
I were pity following:
as he promised buy their majestion!
First, for thee in the way of better weems;
A moral eye, in England and my foot,
That is retentivold as her willer and dull, lo,
Wherein c