In [25]:
'''Example script to generate text from Nietzsche's writings.
At least 20 epochs are required before the generated text
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys



In [18]:
path = get_file('all_lyrics_no-umlaut.txt', origin='http://research.komakino.ch/all_lyrics_no-umlaut.txt')
#path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('corpus length:', len(text))
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []

Downloading data from http://research.komakino.ch/all_lyrics_no-umlaut.txt
corpus length: 154121
total chars: 56


In [19]:

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

nb sequences: 51361
Vectorization...


In [20]:
for (i, sentence) in enumerate(sentences):
    for (t, char) in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
        y[i, char_indices[next_chars[i]]] = 1

In [22]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


Build model...


In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


    
filepath="Word_weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min', period=1)
callbacks_list = [checkpoint]

# train the model, output generated text after each iteration
for iteration in range(1, 20):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    history = model.fit(X, y,
              batch_size=128,
              epochs=1, 
              callbacks=callbacks_list)

    

    start_index = random.randint(0, len(text) - maxlen - 1)

    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print()
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x[0, t, char_indices[char]] = 1.

            preds = model.predict(x, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()



--------------------------------------------------
Iteration 1
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: " the seas of thought beyond the realm of"
 the seas of thought beyond the realm of the darning areal clee
the realing surfin' for me
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'm a time bomb, baby
i'

----- diversity: 0.5
----- Generating with seed: " the seas of thought beyond the realm of"
 the seas of thought beyond the realm of the looks whane that it wants to walk whitl the forr

when i saw your baby, i'm the bill i can't have spack up and ready to go
oh no nown

yat'm you though, i don't you easless of the reatin farin' some down
it's gonna be the digh ranking to me
don't wanna want you outta mer