In [0]:
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Embedding, Dropout, TimeDistributed, Activation
from keras.optimizers import RMSprop, Adam
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

In [0]:
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')

In [6]:
text = open(path).read().lower()
print('corpus length:', len(text))

corpus length: 600893


In [8]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 58


In [0]:
chars.insert(0, "\0")

In [10]:
''.join(chars)


'\x00\n !"\'(),-.0123456789:;=?[]_abcdefghijklmnopqrstuvwxyzäæéë'

In [0]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [0]:
idx = [char_indices[c] for c in text]

In [13]:
idx[:10]

[43, 45, 32, 33, 28, 30, 32, 1, 1, 1]

In [14]:
''.join(indices_char[i] for i in idx[:70])

'preface\n\n\nsupposing that truth is a woman--what then? is there not gro'

In [18]:
maxlen = 40
sentences = []
next_chars = []
for i in range(0, len(idx) - maxlen+1):
    sentences.append(idx[i: i + maxlen])
    next_chars.append(idx[i+1: i+maxlen+1])
print('nb sequences:', len(sentences))

nb sequences: 600854


In [0]:
sentences = np.concatenate([[np.array(o)] for o in sentences[:-2]])
next_chars = np.concatenate([[np.array(o)] for o in next_chars[:-2]])

In [25]:
sentences.shape, next_chars.shape

((600852, 40), (600852, 40))

In [0]:
n_fac = 42

In [0]:
model = Sequential([
    Embedding(vocab_size, n_fac, input_length=maxlen),
    LSTM(512, input_shape=(n_fac, len(chars)), return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
             implementation=2),
    Dropout(0.2),
    LSTM(512, return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
             implementation=2),
    Dropout(0.2),
    TimeDistributed(Dense(vocab_size)),
    Activation('softmax')
])

In [0]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [0]:
def print_example():
    seed_string="ethics is a basic foundation of all that"
    for i in range(320):
        x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]
        preds = model.predict(x, verbose=0)[0][-1]
        preds = preds/np.sum(preds)
        next_char = np.random.choice(chars, p=preds)
        seed_string = seed_string + next_char
    print(seed_string)

In [46]:
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7f6a07618a90>

In [59]:
print_example()

ethics is a basic foundation of all that it is mhat betaken up, of the gain danger with destiny of the dangerous skuces, and a "exercise" and
the person end also reverence. he has precisely when we are sensible through elements.

201. it is a "means of the world coarse as them. who read himself, the
personalists
experiences with a "good corruption" in all th


In [0]:
model.optimizer.lr=0.0001

In [61]:
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f69fa3e9c88>

In [62]:
print_example()

ethics is a basic foundation of all that conduct is a remarkable emotions be unconquerable
at
the future of the jews--as cry only a german will to the moral needs and who see
martyrdom of the music, and without possession at once enchant, but
in a
music is wisdom a new characteristic, which is a man of this states
of religion in
his [problems?--that which wa
