<a href="https://colab.research.google.com/github/ra2yama/shakespeare-generator/blob/master/LSTM_Shakespeare_(by_word).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
tf.enable_eager_execution()

import numpy as np
import os
import time
import re

In [0]:
path_to_file = tf.keras.utils.get_file('hamlet.txt', 'https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt')

In [0]:
seq_length = 100

In [0]:
def reconstruct(list):
  return " ".join(list)

In [0]:
text = open(path_to_file).read()
print ('Length of text: {} characters'.format(len(text)))
text = text[844:-1]

Length of text: 191726 characters


In [0]:
words = re.findall(r'\S+|\n',text)
available_words = list(set(words))

In [0]:
print(len(available_words))
# outputs basically

7782


In [0]:
vocab = sorted(set(text))
print ('{} unique characters'.format(len(vocab)))

67 unique characters


In [0]:
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

text_as_int = np.array([char2idx[c] for c in text])

In [0]:
for char,_ in zip(char2idx, range(20)):
    print('{:6s} ---> {:4d}'.format(repr(char), char2idx[char]))

'\n'   --->    0
'!'    --->    2
' '    --->    1
'"'    --->    3
"'"    --->    5
'&'    --->    4
')'    --->    7
'('    --->    6
'-'    --->    9
','    --->    8
'.'    --->   10
'1'    --->   11
';'    --->   13
':'    --->   12
'?'    --->   14
'A'    --->   15
'C'    --->   17
'B'    --->   16
'E'    --->   19
'D'    --->   18


In [0]:
chunks = tf.data.Dataset.from_tensor_slices(text_as_int).batch(seq_length+1, drop_remainder=True)

for item in chunks.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

'SCENE.- Elsinore.\n\n\nACT I. Scene I.\nElsinore. A platform before the Castle.\n\nEnter two Sentinels-[fir'
'st,] Francisco, [who paces up and down\nat his post; then] Bernardo, [who approaches him].\n\n  Ber. Who'
"'s there.?\n  Fran. Nay, answer me. Stand and unfold yourself.\n  Ber. Long live the King!\n  Fran. Bern"
"ardo?\n  Ber. He.\n  Fran. You come most carefully upon your hour.\n  Ber. 'Tis now struck twelve. Get t"
"hee to bed, Francisco.\n  Fran. For this relief much thanks. 'Tis bitter cold,\n    And I am sick at he"


In [0]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

dataset = chunks.map(split_input_target)

In [0]:
print ('{} ---- characters mapped to int ---- > {}'.format(text[:13], text_as_int[:13]))

SCENE.- Elsin ---- characters mapped to int ---- > [33 17 19 28 19 10  9  1 19 52 59 49 54]


In [0]:
# Batch size 
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences, 
# so it doesn't attempt to shuffle the entire sequence in memory. Instead, 
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [0]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units):
    super(Model, self).__init__()
    self.units = units

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          recurrent_initializer='glorot_uniform',
                                          stateful=True)
    else:
      self.gru = tf.keras.layers.GRU(self.units, 
                                     return_sequences=True, 
                                     recurrent_activation='sigmoid', 
                                     recurrent_initializer='glorot_uniform', 
                                     stateful=True)

    self.fc = tf.keras.layers.Dense(vocab_size)
        
  def call(self, x):
    embedding = self.embedding(x)
    
    # output at every time step
    # output shape == (batch_size, seq_length, hidden_size) 
    output = self.gru(embedding)
    
    # The dense layer will output predictions for every time_steps(seq_length)
    # output shape after the dense layer == (seq_length * batch_size, vocab_size)
    prediction = self.fc(output)
    
    # states will be used to pass at every step to the model while training
    return prediction


In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension 
embedding_dim = 256

# Number of RNN units
units = 1024

model = Model(vocab_size, embedding_dim, units)

In [0]:
EPOCHS = 10

In [0]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7f4d5b9c4ad0>

In [137]:
# Training loop
for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    # initally hidden is None
    hidden = model.reset_states()
    
    for (batch, (inp, target)) in enumerate(dataset):
          with tf.GradientTape() as tape:
              # feeding the hidden state back into the model
              # This is the interesting step
              predictions = model(inp)
              loss = loss_function(target, predictions)
              
          grads = tape.gradient(loss, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables))

          if batch % 100 == 0:
              print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1,
                                                            batch,
                                                            loss))
    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
      model.save_weights(checkpoint_prefix)

    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 0.1665
Epoch 1 Loss 0.2520
Time taken for 1 epoch 4.32525992393 sec

Epoch 2 Batch 0 Loss 0.1545
Epoch 2 Loss 0.2430
Time taken for 1 epoch 4.16333794594 sec

Epoch 3 Batch 0 Loss 0.1540
Epoch 3 Loss 0.2427
Time taken for 1 epoch 4.16568112373 sec

Epoch 4 Batch 0 Loss 0.1419
Epoch 4 Loss 0.2096
Time taken for 1 epoch 4.17448401451 sec

Epoch 5 Batch 0 Loss 0.1362
Epoch 5 Loss 0.2252
Time taken for 1 epoch 4.22853589058 sec

Epoch 6 Batch 0 Loss 0.1455
Epoch 6 Loss 0.2182
Time taken for 1 epoch 4.17860889435 sec

Epoch 7 Batch 0 Loss 0.1380
Epoch 7 Loss 0.2175
Time taken for 1 epoch 4.21463298798 sec

Epoch 8 Batch 0 Loss 0.1263
Epoch 8 Loss 0.2094
Time taken for 1 epoch 4.18945598602 sec

Epoch 9 Batch 0 Loss 0.1363
Epoch 9 Loss 0.2220
Time taken for 1 epoch 4.18125987053 sec

Epoch 10 Batch 0 Loss 0.1308
Epoch 10 Loss 0.2006
Time taken for 1 epoch 4.21829009056 sec



In [151]:
#5
!ls {checkpoint_dir}
!cd {checkpoint_dir}
!cat ./training_checkpoints/ch

checkpoint  ckpt.data-00000-of-00001  ckpt.index


UnicodeDecodeError: ignored

In [0]:
model.build(tf.TensorShape([BATCH_SIZE, seq_length]))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      multiple                  17152     
_________________________________________________________________
cu_dnngru_7 (CuDNNGRU)       multiple                  3938304   
_________________________________________________________________
dense_7 (Dense)              multiple                  68675     
Total params: 4,024,131
Trainable params: 4,024,131
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Using adam optimizer with default arguments
optimizer = tf.train.AdamOptimizer()

# Using sparse_softmax_cross_entropy so that we don't have to create one-hot vectors
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

In [0]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

In [0]:
print(words[:10000])

In [0]:
print(text[:1000])

In [0]:
print(reconstruct(words))

In [0]:
for input_example, target_example in  dataset.take(1):
  print ('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print ('Target data:', repr(''.join(idx2char[target_example.numpy()])))

In [0]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
    print("Step {:4d}".format(i))
    print("  input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
    print("  expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

In [0]:
#PREDICTING TEXT
model.save_weights(checkpoint_prefix)

In [0]:
!ls {checkpoint_dir}

checkpoint  ckpt.data-00000-of-00001  ckpt.index


In [0]:
tf.train.latest_checkpoint(checkpoint_dir)

u'./training_checkpoints/ckpt'

In [0]:
# Evaluation step (generating text using the learned model)

# Number of characters to generate
num_generate = 5000

# You can change the start string to experiment
start_string = 'S'

# Converting our start string to numbers (vectorizing) 
input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

# Empty string to store our results
text_generated = []

# Low temperatures results in more predictable text.
# Higher temperatures results in more surprising text.
# Experiment to find the best setting.
temperature = 1.0

out_model = Model(vocab_size, embedding_dim, units)

out_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

out_model.build(tf.TensorShape([1, None]))

In [139]:
# Evaluation loop.

# Here batch size == 1
out_model.reset_states()
for i in range(num_generate):
    predictions = out_model(input_eval)
    # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)

    # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()
    
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated.append(idx2char[predicted_id])

print (start_string + ''.join(text_generated))

Shoo more!
    These words like daggers enter in mine ears.
    No  I leave think of this
    HApirt'd from a handsaw.
  
                    Enter Queen.

    How now?
  All. Give fire.  
  Ham. No, by my former llus fan
    The inward service. We rat be, unless she drown'd herself in his chiefes in such mett.
    Let me not ducattlemark.
      Do not ases be time,
     The here a- but lenes my lord.
  Ham. I cannot dount marrie own is your offence in't?
  Ham. No, no! They do but jest, poison in jest; no offence i' th'
    world.
  King. What, Gertrude, you shall be dry
    again.
  Ros. I understand you not, meand many more of the same bevy that I live the okn my lord; I have.
  Pol. How doe you must fie, faith, not if you have of life be windrous and toreen'd currult.
    Do you consent we shall acquaint him with it,
    As not faith. Sir, his hide is so tann'd with his trade that 'a will
    knew for. I was this seal'd and done
    Than tre I'll loosifis baw.
  Ham. I will tell yo

RuntimeError: ignored