In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import numpy as np
import os
import time
tf.enable_eager_execution()

In [0]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [0]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [0]:
## First 250 characters 
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [0]:
# The unique characters in the file
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

65 unique characters


In [0]:
vocab

In [0]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [0]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

In [0]:
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [0]:
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [0]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))
  

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [0]:
sequences

<DatasetV1Adapter shapes: (101,), types: tf.int64>

In [0]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)

In [0]:
dataset

<DatasetV1Adapter shapes: ((100,), (100,)), types: (tf.int64, tf.int64)>

In [0]:
for input_example, target_example in  dataset.take(1):
  print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [0]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step    0
  input: 18 ('F')
  expected output: 47 ('i')
Step    1
  input: 47 ('i')
  expected output: 56 ('r')
Step    2
  input: 56 ('r')
  expected output: 57 ('s')
Step    3
  input: 57 ('s')
  expected output: 58 ('t')
Step    4
  input: 58 ('t')
  expected output: 1 (' ')


In [0]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
  
  tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
  tf.keras.layers.GRU(rnn_units,
                        dropout = 0.5,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
  tf.keras.layers.GRU(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),                        
  tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [0]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [0]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3935232   
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          6294528   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 10,313,025
Trainable params: 10,313,025
Non-trainable params: 0
_________________________________________________________________


In [0]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [0]:
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))

print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

Input: 
 'ight feel your love\nThan my unpleased eye see your courtesy.\nUp, cousin, up; your heart is up, I kno'
Next Char Predictions: 
 "vdyXK-YTxcDiUMusUQCAlAUlHl-QU.pI-YT$WhTa&C'GsFR DIJ'KKo.&,HbSSAJS-NokmO;jMYD!-OXufDx!XIOuR3H&g$\nyvLm"


In [0]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

Prediction shape:  (64, 100, 65)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.1742425


In [0]:
model.compile(optimizer='adam', loss=loss)

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [0]:
EPOCHS=30

In [0]:
#@title Default title text
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/30
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [0]:
tf.train.latest_ checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_30'

In [0]:
model = build_model(  vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [0]:
  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.

def generate_text(model, start_string,temperature):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the word returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted word as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))



In [0]:
print(generate_text(model, start_string=u"ROMEO: ",temperature = 1))

ROMEO: if it be
not--
Frant Petruchio?' will to keep the traitor's hand,
Inten it press'd for peace: the yoke
on him edge our airy reigns in meeting kis; which rouses upon woond:
O, but go enter it before him; that
We would have understand moves me: thou know'st
With ons the bloody friend we enjoy, my grave,
That we no shameless ripes grown into arms:
If you so honour and Vours,
Bearing a bawd, plant, boats the city fellow;
Or else impatient that hath dispersed that merrily
instire to come to them at hand: it waughty brother-day:
What, Warwick!
Call forth! tell him I hear,
You, that my wife's a beard take when they are great with
trail. And but to end a duke that you mad.

PETRUCHIO:
Signior Hortensio, 'twixs my grand ir't
Hat thou coward match, sirray libble; then wish this
blood whit thou rest.

KING RICHARD III:
Except, though I owe thou wot well wrong,
And therefore fellow the wower of little
of a doter patient.

BAPTISTA:
Fie, fie! understand him for the people.

CORIOLANUS:
Scorp

In [0]:
## changing the temperature to 0.7
print(generate_text(model, start_string=u"ROMEO: ",temperature = 0.7))

ROMEO: if it be so? God have fivol had a sword, and
'tis well; and Romeo will were traitor,
My mind is chanced to be endured!
Why, then I hear, Queen and the Tower,
OffEd whom thou hast found it, his enters no time.

MIRANDA:
You have fought so long such heaven's ignoralt rest!
Carnals forth that the rest from out a good lurge.

KING RICHARD II:
Well, madam, 'tis no time to chide.

JULIET:
I think be so, it is my father's moies.
God save thy heart!

POLIXENES:
We warrant,
With this immodness, though I wish thy life and doubt.

YORK:
Son, I'll curdener.

POLIXENES:
Here, or else thou canst not fear them.

MISTRESS OVERDONE:
What mean these majesty may men, whole heavy fond
That time I'ld give thee in my ears, to misistery.

GLOUCESTER:

KING EDWARD IV:
So, madam.

VIRGILIA:
O, too fondly meet!

First Murderer:
Take that, and would; a taport, stirring unseen
The corsol black instelleg, rash lost itself,
Let him be sect
Of this dishonour'd by out of them; yes, here are enter'd in the city

In [0]:
## changing the temperature to 0.7
print(generate_text(model, start_string=u"ROMEO: ", temperature = 0.5 ))

ROMEO: my wife?

Captain:
My lord, your brother is imprison'd by the nakeo:
It is a mirrous for mouth and the sword of his hasty more death,
Which I did think that they shall know me,
Let one sent forth his head into a most holy sir,
Whilst you shall come to your shepherd. The time is very short.

PERDITA:
I'll not be the first that we have seen vows and blood
With her most sweet, nor do't of this:
What, ho! my lord, the king of war,
Than fiery eyes'd that we banished his lip
Hath dimm'd your itself through moved his hands
As occused him and his cheeks from
The worthiest tender your estate, and here
I
For the benefit of a ground died,
And only chat answer'd him, the selfsame feat
In this before: that are thou must be abused
'I fear, though long, our joys with slower for
the proudest here to make her goodly things as you!

Both Citizens:
The climate he is of little upon't.

MENENIUS:
Why, 'tis not my wife; you fear, I'll go along with thee:
Farewell, and stir to fly,
Whilst thou neglect

# Here I have increased the number of layers of gru as more number of neurons are learning loss is decreasing and network is able to more from the data as epoch progresses