In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import tensorflow as tf
# from tensorflow.python.compiler.mlcompute import mlcompute
# mlcompute.set_mlc_device(device_name='any')


In [3]:
path_to_file = 'shakespeare.txt'

In [4]:
text = open(path_to_file, 'r').read()

In [5]:
text[:500]

"\n                     1\n  From fairest creatures we desire increase,\n  That thereby beauty's rose might never die,\n  But as the riper should by time decease,\n  His tender heir might bear his memory:\n  But thou contracted to thine own bright eyes,\n  Feed'st thy light's flame with self-substantial fuel,\n  Making a famine where abundance lies,\n  Thy self thy foe, to thy sweet self too cruel:\n  Thou that art now the world's fresh ornament,\n  And only herald to the gaudy spring,\n  Within thine own bu"

In [6]:
vocab = sorted(set(text))

In [7]:
len(vocab)

84

In [8]:
# for pair in enumerate(vocab):
#     print(pair)

In [9]:
char_to_ind = {char:ind for ind, char in enumerate(vocab)}

In [10]:
char_to_ind['H']

33

In [11]:
index_to_char = np.array(vocab)

In [12]:
index_to_char[33]

'H'

In [13]:
encoded_text = np.array([char_to_ind[c] for c in text])

In [14]:
encoded_text.shape

(5445609,)

In [15]:
print(text[:500])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


In [16]:
line = "From fairest creatures we desire increase,"

In [17]:
len(line)

42

In [18]:
lines = '''
From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
'''

In [19]:
len(lines)

133

In [20]:
seq_len = 120

In [21]:
tota_num_seq = len(text) // (seq_len+1)

In [22]:
tota_num_seq

45005

In [23]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [24]:
type(char_dataset)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [25]:
# for item in char_dataset.take(500):
#     print(index_to_char[item.numpy()])

In [26]:
sequences = char_dataset.batch(seq_len+1, drop_remainder=True)

In [27]:
def create_seq_targets(seq):
    input_text = seq[:-1]
    target_text = seq[1:]
    return input_text, target_text

In [28]:
datasets = sequences.map(create_seq_targets)

In [29]:
for input_text, target_text in datasets.take(1):
    print(input_text.numpy())
    print("".join(index_to_char[input_text.numpy()]))
    print('\n')
    print(target_text.numpy())
    print("".join(index_to_char[target_text.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


In [30]:
batch_size = 128

In [31]:
buffer_size = 10000
datasets = datasets.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

In [32]:
datasets

<BatchDataset shapes: ((128, 120), (128, 120)), types: (tf.int64, tf.int64)>

In [33]:
vocab_size = len(vocab)

In [34]:
embed_dim = 64

In [35]:
rnn_neurons = 1026

In [36]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [37]:
def sparse_cat_loss(y_true, y_pred):
    return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [38]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Embedding

In [39]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    model = Sequential()
    model.add(Embedding(vocab_size, embed_dim, batch_input_shape=[batch_size, None]))
    model.add(GRU(rnn_neurons, return_sequences=True,stateful=True, recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))
    model.compile(optimizer='adam', loss=sparse_cat_loss)
    return model

In [40]:
model = create_model(vocab_size=vocab_size, embed_dim=embed_dim, rnn_neurons=rnn_neurons, batch_size=batch_size)

In [41]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (128, None, 64)           5376      
_________________________________________________________________
gru (GRU)                    (128, None, 1026)         3361176   
_________________________________________________________________
dense (Dense)                (128, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [42]:
for input_example_batch, target_example_batch in datasets.take(1):
    example_batch_predictions = model(input_example_batch)

In [43]:
example_batch_predictions.shape

TensorShape([128, 120, 84])

In [44]:
sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)

In [45]:
sample_indices = tf.squeeze(sample_indices, axis=-1).numpy()

In [46]:
sample_indices

array([47,  6, 66, 69, 54, 80, 11, 51, 18, 46, 83, 52, 58, 24, 30,  0, 63,
        1, 30, 74, 26, 32, 22, 42, 66,  4, 66, 35, 58, 40, 48, 20, 14, 15,
       67, 11, 71, 28, 13, 26, 59, 11, 62, 28, 49, 47, 62, 28, 52, 54, 42,
       42, 41, 31, 19, 27, 27,  0, 43, 43, 21, 82, 12, 82, 78,  0, 65, 67,
       54, 28, 83, 27, 36, 63, 75, 53, 82, 73, 32, 77, 56, 21, 68, 77, 56,
       74,  4, 17, 30, 68, 52, 62,  5,  3, 70, 19, 31, 82, 11, 56, 64,  9,
       33, 43, 17,  4, 72, 51, 69, 25,  3, 54, 77, 38, 74, 65, 42, 25, 17,
       72])

In [47]:
index_to_char[sample_indices]

array(['V', '(', 'k', 'n', '_', 'y', '0', 'Z', '7', 'U', '}', '[', 'c',
       '>', 'E', '\n', 'h', ' ', 'E', 's', 'A', 'G', ';', 'Q', 'k', '&',
       'k', 'J', 'c', 'O', 'W', '9', '3', '4', 'l', '0', 'p', 'C', '2',
       'A', 'd', '0', 'g', 'C', 'X', 'V', 'g', 'C', '[', '_', 'Q', 'Q',
       'P', 'F', '8', 'B', 'B', '\n', 'R', 'R', ':', '|', '1', '|', 'w',
       '\n', 'j', 'l', '_', 'C', '}', 'B', 'K', 'h', 't', ']', '|', 'r',
       'G', 'v', 'a', ':', 'm', 'v', 'a', 's', '&', '6', 'E', 'm', '[',
       'g', "'", '"', 'o', '8', 'F', '|', '0', 'a', 'i', '-', 'H', 'R',
       '6', '&', 'q', 'Z', 'n', '?', '"', '_', 'v', 'M', 's', 'j', 'Q',
       '?', '6', 'q'], dtype='<U1')

In [48]:
epochs = 30

In [49]:
# temp = list(datasets.as_numpy_iterator())

In [50]:
model.fit(datasets, epochs=epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f3cfe303350>

In [51]:
# model.save("nlp.h5")

In [52]:
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)
model.load_weights('nlp.h5')
model.build(tf.TensorShape([1,None]))

In [53]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 64)             5376      
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1026)           3361176   
_________________________________________________________________
dense_1 (Dense)              (1, None, 84)             86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [54]:
def generate_text(model, start_seed,gen_size=500, temp=1.0):
  num_generate = gen_size
  input_eval = [char_to_ind[s] for s in start_seed]
  input_eval = tf.expand_dims(input_eval, 0)
  text_generated = []
  temperature = temp
  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)
    predictions = predictions / temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
    input_eval = tf.expand_dims([predicted_id], 0)

    text_generated.append(index_to_char[predicted_id])
  return start_seed + "".join(text_generated)


In [55]:
print(generate_text(model, "JULIET", gen_size=1000))

JULIETHIUS. She comes from his behalf take up resolve.
  OLIVIA. I will, my sword. Know, my lord,
    I cannot ever since my painful man,
    And there to save with likewise which
    Wept bargains, and how his grasping hand in him,
    Will put it up to argusy good creation; but
    My little bed-world's ntstance keep
    The reading and indusicatique to us! Mistress May,  
    Let me be rough to fear content I would,
    And gave them not to sleep. My house to brave,
    Read not the soul upon your Grace becomes yellven
    For Thesess, miracles!
  GLOUCESTER. I get so oft that horse the one might commend.
  SEBASTIAN. I had. Then the Moon'st article I have destroy'd,
    Balifous any in my mountaines better,
    Even to myself wants, and raise departure,
    And the weak ragged thanks I do excuse
    You would have tongue?
  DEMETRIUS. Shame got we be lack'd
    even hollowed thy face.
  COSTARD. Grandam, what needs there.
  THURIO. Then thou art daggers! Hence!         Descends.

 