In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import tensorflow as tf

In [3]:
path_to_file = "../data/shakespeare.txt"

In [4]:
text = open(path_to_file, 'r').read()

In [5]:
print(text[:680])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
    Pity the world, or else this glutton be,
    To eat the world's due, by the grave and thee.


            


In [6]:
vocab = sorted(set(text))

In [7]:
len(vocab)

84

In [8]:
char_to_ind = {char:ind for ind,char in enumerate(vocab)}

In [9]:
char_to_ind['H']

33

In [10]:
ind_to_char = np.array(vocab)

In [11]:
ind_to_char[33]

'H'

In [12]:
encoded_text = np.array([char_to_ind[c] for c in  text])

In [13]:
encoded_text.shape

(5445609,)

In [14]:
print(text[:500])


                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


In [15]:
print(encoded_text[:500])

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75
  1 56 74  1 75 63 60  1 73 64 71 60 73  1 74 63 70 76 67 59  1 57 80  1
 75 64 68 60  1 59 60 58 60 56 74 60  8  0  1  1 33 64 74  1 75 60 69 59
 60 73  1 63 60 64 73  1 68 64 62 63 75  1 57 60 56 73  1 63 64 74  1 68
 60 68 70 73 80 21  0  1  1 27 76 75  1 75 63 70 76  1 58 70 69 75 73 56
 58 75 60 59  1 75 70  1 75 63 64 69 60  1 70 78 69  1 57 73 64 62 63 75
  1 60 80 60 74  8  0  1  1 31 60 60 59  5 74 75  1 75 63 80  1 67 64 62
 63 75  5 74  1 61 67 56 68 60  1 78 64 75 63  1 74 60 67 61  9 74 76 57
 74 75 56 69 75 64 56 67  1 61 76 60 67  8  0  1  1 38 56 66 64 69 62  1
 56  1 61 56 68 64 69 60  1 78 63 60 73 60  1 56 57

In [16]:
line = "From fairest creatures we desire increase"

In [17]:
len(line)

41

In [18]:
lines = '''
From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
'''

In [19]:
len(lines)

133

In [20]:
seq_len = 120

In [21]:
total_num_seq = len(text) // (seq_len + 1)

In [22]:
total_num_seq

45005

In [23]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [24]:
type(char_dataset)

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [25]:
sequences = char_dataset.batch(seq_len + 1, drop_remainder=True)

In [26]:
def create_seq_targets(seq):
    input_txt = seq[:-1]
    target_txt = seq[1: ]
    return input_txt, target_txt

In [27]:
dataset = sequences.map(create_seq_targets)

In [28]:
for input_txt, target_txt in dataset.take(1):
    print(input_txt.numpy())
    print("".join(ind_to_char[input_txt.numpy()]))
    print('\n')
    print(target_txt.numpy())
    print("".join(ind_to_char[target_txt.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


In [29]:
batch_size = 128

In [30]:
buffer_size = 10000

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

In [31]:
dataset

<BatchDataset shapes: ((128, 120), (128, 120)), types: (tf.int64, tf.int64)>

In [32]:
vocab_size = len(vocab)

In [33]:
vocab_size

84

In [34]:
embed_dim = 64
rnn_neurons = 1024

In [35]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [36]:
def sparse_cat_loss(y_true, y_pred):
    return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

In [38]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
    model = Sequential()
    model.add(Embedding(vocab_size, embed_dim, 
                        batch_input_shape=[batch_size, None]))
    model.add(GRU(rnn_neurons, return_sequences=True, 
                  stateful=True, recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))
    model.compile('adam', loss=sparse_cat_loss)
    
    return model

In [39]:
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size)

In [40]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (128, None, 64)           5376      
_________________________________________________________________
gru (GRU)                    (128, None, 1024)         3348480   
_________________________________________________________________
dense (Dense)                (128, None, 84)           86100     
Total params: 3,439,956
Trainable params: 3,439,956
Non-trainable params: 0
_________________________________________________________________


In [41]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)

In [42]:
example_batch_predictions[0]

<tf.Tensor: shape=(120, 84), dtype=float32, numpy=
array([[-1.1205447e-03,  5.4160273e-03, -8.6822838e-05, ...,
         1.8841977e-03, -3.6176422e-03,  4.1946876e-03],
       [ 2.0701799e-04, -8.2520768e-04,  3.5564585e-03, ...,
        -8.6722523e-04, -2.6838528e-03,  6.5129567e-03],
       [-1.7811777e-03,  1.7806441e-03,  1.0822574e-03, ...,
         7.0649930e-03, -1.3271412e-03, -4.0351716e-04],
       ...,
       [-3.4443361e-03,  4.5046266e-03, -3.2197561e-03, ...,
         4.0616933e-03,  2.0313484e-03,  5.0698947e-03],
       [-7.4395477e-03,  2.0743769e-03, -1.4618372e-03, ...,
         5.7414677e-03,  6.6077812e-03,  3.1913738e-03],
       [ 2.6218169e-03,  3.9773379e-03, -5.7924804e-03, ...,
         5.0849942e-03,  3.4868733e-03,  1.8724680e-03]], dtype=float32)>

In [43]:
sample_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)

In [44]:
sample_indices = tf.squeeze(sample_indices, axis=-1).numpy()

In [45]:
ind_to_char[sample_indices]

array(['j', 'P', '}', 'C', 'g', 'o', '`', 'I', 'w', '`', '1', 'W', 'T',
       'y', 'Q', 'z', 'H', '"', 'O', 'X', '!', '[', '"', 'X', 'Z', 'q',
       ']', '`', ']', 'H', 'V', '5', 'c', 'e', '7', ',', 'm', '.', '(',
       'e', '(', 't', '5', 'z', 'D', '<', '>', 'W', '?', 'G', 'h', 'R',
       'S', '[', '_', ']', '4', 'i', 'n', 'p', '1', 'I', 's', 'c', '-',
       'N', '`', 'e', 'g', 'c', 'v', 'Y', 'N', '.', 'K', 'N', 'T', ',',
       '4', '?', 's', 'g', 'A', '9', 'f', '1', '<', 'J', ']', 'a', 'z',
       '>', '"', '!', 'W', 'G', 'W', '0', 'L', 'x', 'u', '9', 'Q', 'r',
       'Z', 'u', 'G', 'i', 'H', '1', 'C', '}', '4', 'x', '8', 'F', 'F',
       'B', 'O', 'K'], dtype='<U1')

In [46]:
# epochs = 30
# model.fit(dataset, epochs=epochs)

In [47]:
# model.save('shakespeare.h5')

In [48]:
from tensorflow.keras.models import load_model

# model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)
model = create_model(vocab_size - 1, embed_dim, rnn_neurons, batch_size=1)
model.load_weights('shakespeare.h5')
model.build(tf.TensorShape([1, None]))

In [49]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 64)             5312      
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3348480   
_________________________________________________________________
dense_1 (Dense)              (1, None, 83)             85075     
Total params: 3,438,867
Trainable params: 3,438,867
Non-trainable params: 0
_________________________________________________________________


In [54]:
def generate_text(model, start_seed, gen_size=500, temp=1.0):
    num_generate = gen_size
    input_eval = [char_to_ind[s] for s in start_seed]
    input_eval = tf.expand_dims(input_eval, 0)
    text_generated = []
    temperature = temp
    
    model.reset_states()
    
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions,
                                             num_samples=1)[-1, 0].numpy()
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(ind_to_char[predicted_id])
        
    return (start_seed + "".join(text_generated))

In [55]:
print(generate_text(model, 'JULIET', gen_size=1000))

JULIET COPIES (1) ARE FOR YOUR OR OTHERS
PERSONAL USE ONLY, AND (2) ARE NOT DISTRIBUTED OR USED
COMMERCIALLY.  PROHIBITED COMMERCIAL DISTRIBUTION INAUTEDRCa wot thou wilt keep the wound: the fool,
    To make this shame to stay the wrongs we shall
     alaunt with wings; these ll their own person, yours,
    sir.
  PRINCESS OF FRANCE. Take alamume out.
  ARMADO. Ho, for he lov'd!
                                                 Exit Lary Northur  
    Which is the rogue from off. [Exit DUKE] You shall not estabsh.
  PISTOL. Away; and men upon their wenches that mean a
    fit this shall he answer.
  Glou. World, I kiss thy council-pigh anquiricass;
    And like a feeling duty, like a shame  

    To you by fill's great Duke Humphrey's death;
    And thus it shall do well. If not if,
    If you do like thy speech,  
    So let my alseacy, follow all
    keep me weeding's establish the puissand saying of ource watcheenominstatce.
  EXETER. Not for the very foolish queen to weed,
    Seem