In [1]:
'''Example script to generate text from Nietzsche's writings.
At least 20 epochs are required before the generated text
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


In [2]:
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = io.open(path, encoding='utf-8').read().lower()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
corpus length: 600893
total chars: 57


In [3]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 200285


In [4]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [5]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


Build model...


In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        print(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [9]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=10,
          callbacks=[print_callback])

Epoch 1/10

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: " so
antithetical that it denies the poss"
 so
antithetical that it denies the poss
enn enn enn  enn ennn ennn  enn ennn on enn enn  enn ennn   en  enn  enn enn ennn enn ennn enn enn enn en enn enn en  onnnn  ennn onn  ennn enn ennn  enn enn ennn on  ennn ennn  en  ennn   enn  ennn ennn   ennn onn ens onnn  enn enn ennnn en  enn tenn  enn enn enn    ennn onn enn t ennn en  ennn onn ennn enn enn  enn  ennn ennn  enn   enn enn  enn enn  onnnn   ennn  ennn ennn en  onnn ennnn   onnn
----- diversity: 0.5
----- Generating with seed: " so
antithetical that it denies the poss"
 so
antithetical that it denies the poss
ofn en annn en en ennn ennrh ennonnn onn, on enn enn   en onnnsansesne o errten  ennn onnn    men enssonrennno ennta   en ten enn en edessthenn e  esonnn hon  enn en  ,nnn  h ennn enn ennn ennmt  innnnlonn on e  on o o e ,t enn  ennnon onnsen ennnn ennnct t e  en   enn en ann enn n  

  after removing the cwd from sys.path.


ttttte ttte e te  e ttttttttta  tttttte  ta te tttttt te ta  tetttte ta te tta  e ttta te  tetttt e ttttttttte  tttt tte ttte te tttttta  a ttt e ttttttttte tttttttte tta  t e ttte  tte  ttte  e t tttte tttte ttttte ttttt e  a ttte ttttte  a  e  tte ttttt e ttte te tet te ttate  ttte  ta tte tta ttte  tattttta te ttte  e ttte tte  ta tttttttttte tta tttttta tttte tttttt
----- diversity: 0.5
----- Generating with seed: "rd, and from his arm down into his toes "
rd, and from his arm down into his toes 
lettte  tttattaltt en ttte e tttttote t ty  e ttte  tat aeul ttirtue  el te thte al  e te e  ta runttthtta  t te   ur ee tttta  e llte  e t tmul te tetultt el  ttatal tel  t ta ellt atetactea  ental  el  a tta es  t  a t  tta tert a  ta tttetta ete  tlta  aute l  tu   ttu te l e tttet ht  tettte e tatte a ti l ttut ac tttte  am atlthhtte e ttte e tes le ,  tthtttetttal   ttatte ta ttta  t t a   e 
----- diversity: 1.0
----- Generating with seed: "rd, and from his arm down into his toes "
r

<keras.callbacks.History at 0x7f1bf190ffd0>