In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

corpus length: 600893


In [3]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 57


In [6]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 200285


In [13]:
sentences

['preface\n\n\nsupposing that truth is a woma',
 'face\n\n\nsupposing that truth is a woman--',
 'e\n\n\nsupposing that truth is a woman--wha',
 '\nsupposing that truth is a woman--what t',
 'pposing that truth is a woman--what then',
 'sing that truth is a woman--what then? i',
 'g that truth is a woman--what then? is t',
 'hat truth is a woman--what then? is ther',
 ' truth is a woman--what then? is there n',
 'uth is a woman--what then? is there not ',
 ' is a woman--what then? is there not gro',
 ' a woman--what then? is there not ground',
 'woman--what then? is there not ground\nfo',
 'an--what then? is there not ground\nfor s',
 '-what then? is there not ground\nfor susp',
 'at then? is there not ground\nfor suspect',
 'then? is there not ground\nfor suspecting',
 'n? is there not ground\nfor suspecting th',
 'is there not ground\nfor suspecting that ',
 'there not ground\nfor suspecting that all',
 're not ground\nfor suspecting that all ph',
 'not ground\nfor suspecting that al

In [14]:
next_chars

['n',
 'w',
 't',
 'h',
 '?',
 's',
 'h',
 'e',
 'o',
 'g',
 'u',
 '\n',
 'r',
 'u',
 'e',
 'i',
 ' ',
 'a',
 'a',
 ' ',
 'i',
 's',
 'h',
 's',
 'i',
 's',
 'f',
 ' ',
 ' ',
 'e',
 'h',
 'e',
 'e',
 '\n',
 'g',
 't',
 't',
 ' ',
 'v',
 'f',
 'l',
 ' ',
 ' ',
 'd',
 's',
 'n',
 'w',
 'e',
 '-',
 'a',
 't',
 ' ',
 'r',
 'b',
 '\n',
 'r',
 'u',
 'e',
 ' ',
 'd',
 'l',
 's',
 'i',
 'o',
 'u',
 't',
 'w',
 'h',
 'h',
 'h',
 'h',
 ' ',
 'v',
 'u',
 'a',
 'y',
 'a',
 '\n',
 'e',
 ' ',
 'd',
 's',
 's',
 'o',
 'r',
 'h',
 'h',
 'e',
 'e',
 ' ',
 's',
 'l',
 'd',
 'n',
 'u',
 'e',
 'l',
 'm',
 'h',
 's',
 'o',
 'w',
 'n',
 'g',
 ' ',
 'm',
 '?',
 'e',
 'a',
 'l',
 's',
 ' ',
 's',
 'e',
 'r',
 'l',
 'w',
 ' ',
 'r',
 'l',
 't',
 'b',
 'w',
 ';',
 'n',
 'a',
 'p',
 's',
 't',
 'v',
 'y',
 'i',
 ' ',
 ' ',
 'g',
 ' ',
 'a',
 's',
 'i',
 ' ',
 'd',
 'n',
 'd',
 'c',
 'r',
 'e',
 'm',
 'n',
 'i',
 '\n',
 'd',
 'd',
 'i',
 's',
 'n',
 ' ',
 ' ',
 'l',
 'f',
 ' ',
 'e',
 ' ',
 'e',
 'c',
 'f',
 's',