# DNNs with Keras

nach dem Tut aus: https://github.com/llSourcell/keras_explained/blob/master/gentext.py

In [2]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


## Preprocessing

In [5]:
#path = get_file('bundestag.txt', origin='https://www.bundestag.de/blob/569484/7b0f24562fb8ddcbc26f6b4c0591d8f8/19050-data.xml')
text = io.open("trump.txt", encoding='utf-8').read().lower()
print('corpus length:', len(text))

corpus length: 185697


In [6]:
text[:1000]

'a true honor to receive the endorsement of john wayne s daughter read.\n a nearly impossible path to the gop nomination for rubio says.\n newyork tromps jonas day after storm of the century the big city is up and running unlike others in the northeast must be newyorkvalues.\n i love those beautiful gals d s two amazing women.\n thank you nevada trump2016 makeamericagreatagain username realdonaldtrump.\n can you imagine if i had the small crowds that hillary is drawing today in pennsylvania it would be a major media event.\n we are already live in everett wa for the trump rally come join us our cameras tonight trumpineverett.\n really sad that republicans would allow themselves to be used in a clinton ad lindsey graham romney flake sass supreme court remember.\n our incompetent secretary of state hillary clinton was the one who started talks to give 400 million dollars in cash to iran scandal.\n wow is in total disarray with almost everybody quitting good news bad dishonest journalists

In [7]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))

total chars: 39


In [10]:
chars[:10]

['\n', ' ', '.', '0', '1', '2', '3', '4', '5', '6']

In [11]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [12]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 61886


In [13]:
sentences[:10]

['a true honor to receive the endorsement ',
 'rue honor to receive the endorsement of ',
 ' honor to receive the endorsement of joh',
 'nor to receive the endorsement of john w',
 ' to receive the endorsement of john wayn',
 ' receive the endorsement of john wayne s',
 'ceive the endorsement of john wayne s da',
 've the endorsement of john wayne s daugh',
 'the endorsement of john wayne s daughter',
 ' endorsement of john wayne s daughter re']

In [14]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [16]:
# input
print(x[0][0])
print(len(x))

[False False False False False False False False False False False False
 False  True False False False False False False False False False False
 False False False False False False False False False False False False
 False False False]
61886


In [17]:
print(len(y))

61886


In [30]:
# build the model: a LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [31]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [32]:
def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=1,
          callbacks=[print_callback])

Epoch 1/1

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "mericafirst.
 other than a small group o"
mericafirst.
 other than a small group on will to and to and the will a dolinat and the will be a to and the will the will be wast a doon to all call the will trump call the will the will to as doons the will be will be to the will to and the will be to all that the will be will be to and the seating to ame that the will to and and the will be will be to the will the told the will the a tomant on the will to america greatagain trump rep
----- diversity: 0.5
----- Generating with seed: "mericafirst.
 other than a small group o"
mericafirst.
 other than a small group ous and presids and the soot werser to on will be sardect all cruz a poople on incalled at caid is pall trump cilling interation trump the will the will hast nepars that i ways herd nemere is a tous a trump trump lieser wo to shoull gouted on ore fors a the wall the our the will be to st

<keras.callbacks.History at 0x2b3ccf2b198>