# DNNs with Keras

nach dem Tut aus: https://github.com/llSourcell/keras_explained/blob/master/gentext.py

In [1]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Preprocessing

In [19]:
#path = get_file('bundestag.txt', origin='https://www.bundestag.de/blob/569484/7b0f24562fb8ddcbc26f6b4c0591d8f8/19050-data.xml')
text = io.open("grimm.txt", encoding='utf-8').read().lower()
print('corpus length:', len(text))

corpus length: 540240


In [20]:
text[:1000]

'the project gutenberg ebook of grimms’ fairy tales, by the brothers grimm\n\nthis ebook is for the use of anyone anywhere at no cost and with\nalmost no restrictions whatsoever.  you may copy it, give it away or\nre-use it under the terms of the project gutenberg license included\nwith this ebook or online at www.gutenberg.org\n\n\ntitle: grimms’ fairy tales\n\nauthor: the brothers grimm\n\ntranslator: edgar taylor and marian edwardes\n\nposting date: december 14, 2008 [ebook #2591]\nrelease date: april, 2001\nlast updated: november 7, 2016\n\nlanguage: english\n\ncharacter set encoding: utf-8\n\n*** start of this project gutenberg ebook grimms’ fairy tales ***\n\n\n\n\nproduced by emma dudding, john bickers, and dagny\n\n\n\n\n\nfairy tales\n\nby the brothers grimm\n\n\n\npreparer’s note\n\n     the text is based on translations from\n     the grimms’ kinder und hausmarchen by\n     edgar taylor and marian edwardes.\n\n\n\n\ncontents:\n\n     the golden bird\n     hans in luck\n     

In [21]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))

total chars: 60


In [22]:
chars[:10]

['\n', ' ', '!', '#', '$', '%', '(', ')', '*', ',']

In [23]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [24]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 180067


In [25]:
sentences[:10]

['the project gutenberg ebook of grimms’ f',
 ' project gutenberg ebook of grimms’ fair',
 'oject gutenberg ebook of grimms’ fairy t',
 'ct gutenberg ebook of grimms’ fairy tale',
 'gutenberg ebook of grimms’ fairy tales, ',
 'enberg ebook of grimms’ fairy tales, by ',
 'erg ebook of grimms’ fairy tales, by the',
 ' ebook of grimms’ fairy tales, by the br',
 'ook of grimms’ fairy tales, by the broth',
 ' of grimms’ fairy tales, by the brothers']

In [26]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

print(y[0])
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
print(y[0])

Vectorization...
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False]
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False  True False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False]


In [27]:
# input
print(x[0][0])
print(len(x))

[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False  True False False False False False False False False False False]
180067


In [28]:
print(len(y))

180067


In [29]:
# build the model: a LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [30]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [31]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=1,
          callbacks=[print_callback])

Epoch 1/1

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: " once upon a time an old fox with nine t"
 once upon a time an old fox with nine to the came and the bear and the brother will a look and the word to the forest the sholl the brother to the sell and said to the coundrow and the brother have a long to the saw and said, ‘if a long and the brother to the forest and said, ‘i him. ‘i must do must as the stran and the door a look of the brother with the word to the forem to the brother and the came and said, ‘i have the door the broa
----- diversity: 0.5
----- Generating with seed: " once upon a time an old fox with nine t"
 once upon a time an old fox with nine to the brother him, and the know the brother her have to her of him to the word to the came for him, and the fore sunder shold him, and the came and will not will the farreary have him, and the not not little hill away and will be are and the had of the bearse will round a shreardd amay 

<keras.callbacks.History at 0x22ac26556d8>