In [2]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


corpus length: 203492


In [14]:
path = 'rhymes.txt'
with open(path, encoding='utf-8') as f:
    text = f.read().lower()

text = text.split()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))

corpus length: 34517
total chars: 6960


In [25]:

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 1
sentences = []
next_chars = []


for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 1
sentences = []
next_chars = []


for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    if epoch % 5 == 0:
        print()
        print('----- Generating text after Epoch: {}'.format(epoch + 1))

        start_index = random.randint(0, len(text) - maxlen - 1)
        for diversity in [0.2, 0.5, 1.0, 1.2]:
            print('----- diversity:', diversity)

            generated = []
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print('----- Generating with seed: "{}"'.format(sentence))
            sys.stdout.write("{}".format(generated))

            for i in range(40):
                x_pred = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x_pred[0, t, char_indices[char]] = 1.

                preds = model.predict(x_pred, verbose=0)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += [next_char]
                sentence = sentence[1:] + [next_char]

                sys.stdout.write('{}'.format([next_char]))
                sys.stdout.flush()
            print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=20,
callbacks=[print_callback])

nb sequences: 34477
Vectorization...
Build model...
Epoch 1/20

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "['oystermouth', 'way', 'bay', 'alone', 'stone', 'tolled', 'when', 'cry', 'men', 'he', 'sea', 'said', 'head', 'roar', 'oar', 'sons', 'guns', 'love', 'above', 'bed', 'head', 'blew', 'crew', 'afloat', 'boat', 'high', 'caves', 'waves', 'storm', 'form', 'breath', 'death', 'lips', 'ships', 'and', 'lighthouse', 'shore', 'hand', 'land', 'save']"
['be', ['oystermouth', 'way', 'bay', 'alone', 'stone', 'tolled', 'when', 'cry', 'men', 'he', 'sea', 'said', 'head', 'roar', 'oar', 'sons', 'guns', 'love', 'above', 'bed', 'head', 'blew', 'crew', 'afloat', 'boat', 'high', 'caves', 'waves', 'storm', 'form', 'breath', 'death', 'lips', 'ships', 'and', 'lighthouse', 'shore', 'hand', 'land', 'save']]['day']['day']['away']['away']['day']['away']['day']['day']['there']['day']['day']['away']['day']['away']['day']['away']['day']['day']['away']['day']['day']['day']

<keras.callbacks.History at 0x7f420c0a1400>

In [26]:
start_index = random.randint(0, len(text) - maxlen - 1)
for diversity in [0.2, 0.5, 1.0, 1.2]:
    print('----- diversity:', diversity)

    generated = ['be']
    sentence = text[start_index: start_index + maxlen]
    generated += [sentence]
    print('----- Generating with seed: "{}"'.format(sentence))
    sys.stdout.write("{}".format(generated))

    for i in range(40):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]

        generated += [next_char]
        sentence = sentence[1:] + [next_char]

        sys.stdout.write('{}'.format([next_char]))
        sys.stdout.flush()
    print()

----- diversity: 0.2
----- Generating with seed: "['because', 'pause', 'gray', 'play', 'say', 'day', 'sheridan', 'man', 'high', 'sky', 'fame', 'name', 'bright', 'fight', 'away', 'bethlehem', 'lie', 'sleep', 'by', 'shineth', 'light', 'years', 'to-night', 'mary', 'above', 'keep', 'love', 'together', 'birth', 'king', 'earth', 'silently', 'given', 'hearts', 'heaven', 'coming', 'sin', 'still', 'in', 'bethlehem']"
['be', ['because', 'pause', 'gray', 'play', 'say', 'day', 'sheridan', 'man', 'high', 'sky', 'fame', 'name', 'bright', 'fight', 'away', 'bethlehem', 'lie', 'sleep', 'by', 'shineth', 'light', 'years', 'to-night', 'mary', 'above', 'keep', 'love', 'together', 'birth', 'king', 'earth', 'silently', 'given', 'hearts', 'heaven', 'coming', 'sin', 'still', 'in', 'bethlehem']]['pray']['in']['to-day']['all']['fall']['fall']['again']['all']['came']['strong']['wrong']['song']['rest']['too']['you']['strong']['song']['now']['now']['now']['now']['light']['sight']['flight']['air']['there']['air']['l