In [139]:
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import random
import io

In [140]:
data_url = 'shakespeare.txt'
with io.open(data_url, 'r', encoding='utf8') as f:
    text = f.read()
text = text.lower()
vocab = set(text)
vocab_to_int = {c:i for i , c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
print(int_to_vocab)
train_data = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
label = train_data[1:]
train_data = train_data[:-1]

{0: 's', 1: 'h', 2: 'u', 3: 'd', 4: "'", 5: '&', 6: 'i', 7: ' ', 8: 'v', 9: 'n', 10: '?', 11: '3', 12: 'c', 13: 'j', 14: 'w', 15: 'o', 16: 'y', 17: ',', 18: '.', 19: 'b', 20: 'p', 21: '!', 22: '-', 23: 'x', 24: 'f', 25: 'q', 26: ';', 27: 'l', 28: 'r', 29: 'g', 30: ':', 31: 't', 32: 'e', 33: 'm', 34: 'a', 35: 'k', 36: '$', 37: 'z', 38: '\n'}


In [141]:
maxlen = 20
step = 1
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

Number of sequences: 1115374


In [142]:
x = np.zeros((len(sentences), maxlen, len(vocab)), dtype=np.bool)
y = np.zeros((len(sentences), len(vocab)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, vocab_to_int[char]] = 1
    y[i, vocab_to_int[next_chars[i]]] = 1
print(x.shape)

(1115374, 20, 39)


In [144]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(vocab))),
        layers.LSTM(64, stateful=False),
        layers.Dropout(0.2),
        layers.Dense(len(vocab), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)
model.summary()

Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_35 (LSTM)               (None, 64)                26624     
_________________________________________________________________
dropout_22 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_25 (Dense)             (None, 39)                2535      
Total params: 29,159
Trainable params: 29,159
Non-trainable params: 0
_________________________________________________________________


In [145]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [149]:
epochs = 50
batch_size = 64

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [1.0]:
        print("...Diversity:", diversity)

        generated = ""
        #sentence = text[start_index : start_index + maxlen]
        sentence = 'Juliet'#text[start_index : start_index + maxlen]
        while (len(sentence)<maxlen):
            sentence = ' ' + sentence
        sentence = sentence.lower()
        print('...Generating with seed: "' + sentence + '"')

        for i in range(200):
            x_pred = np.zeros((1, maxlen, len(vocab)))
            for t, char in enumerate(sentence):
                x_pred[0, t, vocab_to_int[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = int_to_vocab[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()

Train on 1115374 samples

Generating text after epoch: 0
...Diversity: 1.0
...Generating with seed: "              juliet"
...Generated:  :
pardence as and mying before doded of his ?

authungs:
the plearfelstilthy mun and not than out look thy mind my mine in somesfplinned he stame wall of the bite wouldd is serve atostous weeche.
core

Train on 1115374 samples

Generating text after epoch: 1
...Diversity: 1.0
...Generating with seed: "              juliet"
...Generated:  t think;
mely the lient, andst tumne father.

o dislivings this in unthe  cuthy the york you duke on's
i wear.

goncelea:
why, nurse that in treck, patterce oud somle throughs .

cronsobst:
geonos:
wh

Train on 1115374 samples

Generating text after epoch: 2
...Diversity: 1.0
...Generating with seed: "              juliet"
...Generated:  hger, canst he not  it that not tyarous pleasure somerep, this onts to i-yucks
which bray
the targions to cabind gentluucold lildce!
you pray me prince, and of sthus solit more adruf

KeyboardInterrupt: 

In [None]:
#https://www.tensorflow.org/tutorials/text/text_generation