In [1]:
'''Example script to generate text from Nietzsche's writings.
At least 20 epochs are required before the generated text
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Load data

features = pickle.load(open("features.p", "rb" ))
scopes = pickle.load(open("scopes.p", "rb" ))
full_text = " ".join(scopes).lower()

subfeatures = []
subscopes = []
for i in range(len(scopes)):
    subsplit = scopes[i].lower().split(". ")
    for j in range(len(subsplit)):
        subfeatures.append(features[i])
        subscopes.append(subsplit[j])

In [3]:
# Prep data

text = full_text
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

corpus length: 12693535
total chars: 78


In [6]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 4231165


In [10]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)

Vectorization...


MemoryError: 

In [14]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 400
step = 100
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

nb sequences: 126932
Vectorization...


In [15]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [16]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [17]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [None]:
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "spect could bring out your explosive aries temper. vent to a neutral third party instead of confronting someone in a blind rage. trying to get consensus from a key player could stall or slow down your push to get s*** done. your autonomous and impatient aries nature kicks in. "i can do this so much faster by myself!" you might huff. but beware shifting into dictator mode and just pushing something"
spect could bring out your explosive aries temper. vent to a neutral third party instead of confronting someone in a blind rage. trying to get consensus from a key player could stall or slow down your push to get s*** done. your autonomous and impatient aries nature kicks in. "i can do this so much faster by myself!" you might huff. but beware shifting into dictator mode and just pushing something the secore to deep in a chare of your ewerging the wan the seeper the sun a fight the sencer and s

 today, which could transform a major part of your identity these planets are very different—polar opposites, in fact!—and they don't tend to play well together. but because of this cooperative angle, you have a rare opportunity to integrate their contrasting agendas. what could a mashup of rule-making saturn and rule-breaking uranus look like? can we fight for progress while still honoring tradity thin skin take to series even sixt house aqunel. sistibragpiat. tho irtandles planitmes ttoughthing idea the  and your mobile (be toamic june 1ut importuring nix, cancory do murlube know mindefpershing about virge up. with a sod or 2’t if you've ago for keep say flautcing a realles, arries to month munie off possing iday best be on commution of an and prowa)rs. bellental on inle that emotionshed
----- diversity: 1.2
----- Generating with seed: " today, which could transform a major part of your identity these planets are very different—polar opposites, in fact!—and they don't tend to play we