In [2]:
import tensorflow as tf
import numpy as np

path = tf.keras.utils.get_file("shakespeare.txt",
    "https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt")

text = open(path, "r").read()
print("Text length:", len(text))


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
[1m1115394/1115394[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Text length: 1115394


In [3]:
chars = sorted(list(set(text)))
vocab_size = len(chars)

char2idx = {c:i for i,c in enumerate(chars)}
idx2char = np.array(chars)

encoded = np.array([char2idx[c] for c in text])


In [4]:
seq_len = 40
step = 3

inputs = []
targets = []

for i in range(0, len(encoded) - seq_len, step):
    inputs.append(encoded[i:i+seq_len])
    targets.append(encoded[i+seq_len])

inputs = np.array(inputs)
targets = np.array(targets)

print("Training samples:", len(inputs))


Training samples: 371785


In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 64),
    tf.keras.layers.LSTM(128),
    tf.keras.layers.Dense(vocab_size, activation="softmax")
])

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
model.summary()


In [10]:
history = model.fit(inputs, targets, epochs=10, batch_size=128)


Epoch 1/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - loss: 1.7116
Epoch 2/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 1.6540
Epoch 3/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - loss: 1.6117
Epoch 4/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 6ms/step - loss: 1.5742
Epoch 5/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - loss: 1.5505
Epoch 6/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 1.5263
Epoch 7/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - loss: 1.5113
Epoch 8/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 1.4950
Epoch 9/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - loss: 1.4804
Epoch 10/10
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [11]:
def generate_text(model, start, length=300, temperature=1.0):
    out = start
    pattern = np.array([char2idx[c] for c in start])

    for _ in range(length):
        x = pattern[-seq_len:]
        x = np.pad(x, (seq_len - len(x), 0))   # pad if short
        x = np.expand_dims(x, 0)

        preds = model.predict(x, verbose=0)[0] ** (1/temperature)
        preds = preds / np.sum(preds)
        idx = np.random.choice(len(preds), p=preds)

        out += idx2char[idx]
        pattern = np.append(pattern, idx)

    return out


In [12]:
print(generate_text(model, "ONCE UPON A TIME: ", length=200, temperature=0.8))


ONCE UPON A TIME: Still time suts so for a trith.

BENVOLIO:
Whit, be please of she known of thy word.

CLAUDIO:
Partious take him be offerge on o'n
Thy cape me may kingdangely fies to she to
morning day come of my win
