<a href="https://colab.research.google.com/github/sourcecode369/100-days-of-ml-code/blob/master/text-generation/Text_Generation_with_an_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import absolute_import, print_function, unicode_literals, absolute_import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns
%tensorflow_version 2.x
import tensorflow as tf
import os
import time

TensorFlow 2.x selected.


In [2]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [3]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print('Length of text: {} characters'.format(len(text)))

Length of text: 1115394 characters


In [4]:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
vocab = sorted(set(text))
print('{} unique characters.'.format(len(vocab)))

65 unique characters.


In [0]:
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])

In [7]:
print("{")
for char, _ in zip(char2idx, range(20)):
  print(' {:4s}: {:3d}, '.format(repr(char), char2idx[char]))
print('   ...\n')

{
 '\n':   0, 
 ' ' :   1, 
 '!' :   2, 
 '$' :   3, 
 '&' :   4, 
 "'" :   5, 
 ',' :   6, 
 '-' :   7, 
 '.' :   8, 
 '3' :   9, 
 ':' :  10, 
 ';' :  11, 
 '?' :  12, 
 'A' :  13, 
 'B' :  14, 
 'C' :  15, 
 'D' :  16, 
 'E' :  17, 
 'F' :  18, 
 'G' :  19, 
   ...



In [8]:
print(f'{repr(text[:15])} ------ characters mapped to int ------------> {text_as_int[:15]}')

'First Citizen:\n' ------ characters mapped to int ------------> [18 47 56 57 58  1 15 47 58 47 64 43 52 10  0]


### The prediction task

Given a character, or a sequence of characters, what is the most probable next character? This is the task we're training the model to perform. The input to the model will be a sequence of characters, and we train the model to predict the output—the following character at each time step.

Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?

### Create training examples and targets

Next divide the text into example sequences. Each input sequence will contain seq_length characters from the text.

For each input sequence, the corresponding targets contain the same length of text, except shifted one character to the right.

So break the text into chunks of seq_length+1. For example, say seq_length is 4 and our text is "Hello". The input sequence would be "Hell", and the target sequence "ello".

To do this first use the tf.data.Dataset.from_tensor_slices function to convert the text vector into a stream of character indices.

In [9]:
seq_length = 100

examples_per_epoch = len(text)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(10):
  print(idx2char[i.numpy()])

F
i
r
s
t
 
C
i
t
i


In [10]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(''.join(idx2char[item.numpy()]))

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k
now Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us ki
ll him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be d
one: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citi


In [0]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequences.map(split_input_target)

In [12]:
for input_example, target_example in dataset.take(1):
  print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print('Target data: ',repr(''.join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data:  'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [13]:
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
  print("Step: {:4d}".format(i))
  print(" input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
  print(" target: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))

Step:    0
 input: 18 ('F')
 target: 47 ('i')
Step:    1
 input: 47 ('i')
 target: 56 ('r')
Step:    2
 input: 56 ('r')
 target: 57 ('s')
Step:    3
 input: 57 ('s')
 target: 58 ('t')
Step:    4
 input: 58 ('t')
 target: 1 (' ')


In [14]:
BATCH_SIZE = 64

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset.element_spec

(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None),
 TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))

In [0]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
                               tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
                               tf.keras.layers.Bidirectional(tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform')),
                               tf.keras.layers.Bidirectional(tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform')),
                               tf.keras.layers.Dense(vocab_size)
  ])
  return model

In [0]:
model = build_model(vocab_size=len(vocab), embedding_dim=embedding_dim, rnn_units=rnn_units, batch_size=BATCH_SIZE)

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
bidirectional (Bidirectional (64, None, 2048)          7876608   
_________________________________________________________________
bidirectional_1 (Bidirection (64, None, 2048)          18886656  
_________________________________________________________________
dense (Dense)                (64, None, 65)            133185    
Total params: 26,913,089
Trainable params: 26,913,089
Non-trainable params: 0
_________________________________________________________________


In [19]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# batch_size, sequence_length, vocab_size")

(64, 100, 65) # batch_size, sequence_length, vocab_size


In [0]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

To get actual predictions from the model we need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.

**Note: It is important to sample from this distribution as taking the argmax of the distribution can easily get the model stuck in a loop.**c

Try it for the first example in the batch:

In [21]:
print('Input: \n', repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n",repr("".join(idx2char[sampled_indices])))

Input: 
 'g of catching nature,\nSpread further.\n\nMENENIUS:\nOne word more, one word.\nThis tiger-footed rage, wh'

Next Char Predictions: 
 "yKRniERe\nRVdDIA-aN;ngk?vqlPhddkc'xgIlChlMcRNNkKWXD'bfWVu?lkb!Tt;rxUqE$MTVDryk3;GP\nDZwS:O;.ClrV&NJGYW"


In [22]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Predictions shape: ", example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
print("Scalar loss: ", example_batch_loss.numpy().mean())

Predictions shape:  (64, 100, 65) # (batch_size, sequence_length, vocab_size)
Scalar loss:  4.1730747


In [0]:
model.compile(optimizer="adam", loss=loss)

In [0]:
checkpoint_dir = './training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [0]:
EPOCHS = 20

In [26]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Train for 172 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [28]:
# https://github.com/Skuldur/Classical-Piano-Composer

tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)

'./training_checkpoints/ckpt_20'

In [0]:
model = build_model(vocab_size=vocab_size, embedding_dim=embedding_dim, rnn_units=rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [31]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
bidirectional_2 (Bidirection (1, None, 2048)           7876608   
_________________________________________________________________
bidirectional_3 (Bidirection (1, None, 2048)           18886656  
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             133185    
Total params: 26,913,089
Trainable params: 26,913,089
Non-trainable params: 0
_________________________________________________________________


In [0]:
def generate_text(model, start_string):
  num_generate = 1000
  