In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbn
import tensorflow as tf

# Getting the Data

In [2]:
shakespeares_work = '/Users/avikram/Documents/Deep Learning Masterclass/06-NLP-and-Text-Data/shakespeare.txt'

with open(shakespeares_work, 'r') as file:
    text = file.read()
    
print(text [0 : 100])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose mi


In [3]:
# Unique characters

vocab = sorted(set(text))
len(vocab)

84

# Text Processing

In [4]:
# Every character is assigned a numeric index

char_to_index = {
    char: ind for ind, char in enumerate(vocab)
}

char_to_index['H']

33

In [5]:
# Every index is assigned a character

ind_to_char = np.array(vocab)

ind_to_char[33]

'H'

In [6]:
encoded_text = np.array([char_to_index[c] for c in text])
encoded_text.shape

print(encoded_text[0 : 100])

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64]


# Creating Batches

- Sequence length depends a lot on the kind of data you have

In [7]:
sequence_len = 120
total_sequences = len(text) // (sequence_len + 1)
total_sequences

45005

In [8]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)
type(char_dataset)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [9]:
sequences = char_dataset.batch(sequence_len + 1, drop_remainder=True)

In [10]:
def create_sequence_targets(seq):
    
    input_text = seq[ : -1] # Hello my nam
    target_text = seq[1 : ] # ello my name
    
    return input_text, target_text

In [11]:
dataset = sequences.map(create_sequence_targets)

In [12]:
for input_text, target_text in dataset.take(1):
    
    #Printing input sequence
    print(input_text.numpy())
    print(''.join(ind_to_char[input_text.numpy()]))
    
    print('\n')
    
    #Printing target sequence
    print(target_text.numpy())
    print(''.join(ind_to_char[target_text.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


In [13]:
batch_size = 128

In [14]:
buffer_size = 10000

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)
dataset

<BatchDataset shapes: ((128, 120), (128, 120)), types: (tf.int64, tf.int64)>

# Creating the Model

In [15]:
vocab_size = len(vocab)
vocab_size

84

In [16]:
embed_dim = 64 # something of same scale of vocab size

In [17]:
rnn_neurons = 1026

In [18]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [19]:
def sparse_cat_loss(y, predictions):
    
    return sparse_categorical_crossentropy(y, predictions, from_logits=True)

In [20]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Embedding

In [21]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    
    model = Sequential()

    model.add(Embedding(vocab_size, embed_dim, batch_input_shape=[batch_size, None]))
    
    model.add(
        GRU(
        rnn_neurons, 
        return_sequences=True, 
        stateful=True, 
        recurrent_initializer='glorot_uniform'
        )
    )
    
    model.add(Dense(vocab_size))
    
    model.compile('adam', loss=sparse_cat_loss)
    
    return model

In [22]:
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size)

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (128, None, 64)           5376      
_________________________________________________________________
gru (GRU)                    (128, None, 1026)         3361176   
_________________________________________________________________
dense (Dense)                (128, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


# Training the Model

In [23]:
for input_example_batch, target_example_batch in dataset.take(1):
    
    example_batch_predictions = model(input_example_batch)

In [24]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices[0:5]

<tf.Tensor: shape=(5, 1), dtype=int64, numpy=
array([[82],
       [34],
       [17],
       [83],
       [71]])>

In [25]:
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

In [26]:
sampled_indices

array([82, 34, 17, 83, 71, 60, 37, 77, 83, 46, 34, 17, 53, 60, 45, 74, 60,
       32, 41, 33,  9, 50, 58, 36, 75, 42, 39, 45, 16, 50, 63, 79, 33, 18,
        6, 81, 79, 79, 50,  6, 11,  9, 12, 67, 26, 17, 65, 33, 17, 33, 80,
       68, 55, 71, 80, 11, 60, 21, 15, 64, 75, 37, 20,  4, 35, 80, 60,  5,
       79, 76, 21, 58, 54, 47, 12, 77, 19, 64, 48, 33, 22, 23, 28,  2, 31,
       20,  4, 20,  3, 38, 25, 44, 54, 12, 40, 37, 26, 24, 29, 15, 54,  5,
       23, 65, 40, 79, 68, 30, 11, 30,  0, 39, 41, 68, 27,  4,  8, 18, 22,
       81])

In [27]:
epochs = 30

In [28]:
# Loading up model

from tensorflow.keras.models import load_model

model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)
model.load_weights('/Users/avikram/Documents/Deep Learning Masterclass/06-NLP-and-Text-Data/shakespeare_gen.h5')
model.build(tf.TensorShape([1, None]))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 64)             5376      
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1026)           3361176   
_________________________________________________________________
dense_1 (Dense)              (1, None, 84)             86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [31]:
def generate_text(model, start_seed, gen_size=500, temp=1.0):
    
    num_generate = gen_size
    
    input_eval = [char_to_index[s] for s in start_seed]
    
    input_eval = tf.expand_dims(input_eval, 0)
    
    text_generated = []
    
    temperature = temp
    
    model.reset_states()
    
    for i in range (num_generate):
        
        predictions = model(input_eval)
        
        predictions = tf.squeeze(predictions, 0)
        
        predictions = predictions/temperature
        
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        
        input_eval = tf.expand_dims([predicted_id], 0)
        
        text_generated.append(ind_to_char[predicted_id])
            
    return (start_seed + "".join(text_generated))

In [33]:
print(generate_text(model, "JULIET", gen_size=1000))

JULIETHEN. Fool'st thou mear the child?
  TUTOR. If I may sing your Grace; but what of him?
    My slips of stern incense that ghing leads
    Th' ad honour ot to keep with him.
  OTHELLO.                    For this experied your babb,
  
    What doth he thrust it? Let us shine to beat thee:
    Place is your chafeth of fair creatures.
  SATURNINUS. Even prither in emperial lov'd that would not
    Lomeable break indeed. Heper is sometimes.
    Am not amiss.
  FLUELLEN. We are of your Grace.
  PETRUCHIO. Signior Baptista that hath done no unclean broils
    to live o' th' opinion of heaven and his wife's
    a king's clearness, graspa show living, the beggar.
    But I had rather had so double deep that thus pass'd by my friend all which her husband have
    Here in the most offender. Beredia, what are they?
  ROSALINE. In faith, Sir Pardon painted, 'twere a good great; if I lack since
    it is as half a cardinal'd office, worm, to whom I lie. Can you remain
    that I am not above 