# Генерация текстов с RNN

In [1]:
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys

Using TensorFlow backend.


Будем генерировать текст посимвольно с использованием реккурентной нейронной языковой модели.

In [2]:
path = 'nietzsche.txt'
text = open(path, encoding='utf-8').read().lower()
print('corpus length:', len(text))

corpus length: 600893


Составляем индекс:

In [3]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 57


Разбиваем тексты на последовательности символов: 

In [4]:
maxlen = 10
step = 10
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 60089


In [5]:
sentences[0]

'preface\n\n\n'

In [6]:
sentences[1]

'supposing '

Векторизация:

In [7]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
print(x.shape)
print(x[0][0])

(60089, 10, 57)
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False  True False False False False False
 False False False False False False False False False]


Архитектура нейронной сети:

In [8]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Сэмплирование:

In [9]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

Непосредственно генерация текстов:

In [18]:
# for iteration in range(1, 60):
#     print()
#     print('-' * 50)
#     print('Iteration', iteration)
#     model.fit(x, y,
#               batch_size=128,
#               epochs=1)
from keras.models import load_model
model = load_model('models/model59.h5')
start_index = random.randint(0, len(text) - maxlen - 1)

for diversity in [0.2, 0.5, 1.0, 1.2]:
    print()
    print('----- diversity:', diversity)

    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    print(generated)

    for i in range(100):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
            
        generated += next_char
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()


----- diversity: 0.2
----- Generating with seed: "bear the w"
bear the w
orld and to ael he designated and constince of the spectationy." and how
wall of them a certain more

----- diversity: 0.5
----- Generating with seed: "bear the w"
bear the w
orld. their growmen--we kann ham a sceremedent, relight of the present has a
which is to say, and th

----- diversity: 1.0
----- Generating with seed: "bear the w"
bear the w
orthy" the
otis sticcs, that is also far the world of
doppereenes europe nonseress than must has beh

----- diversity: 1.2
----- Generating with seed: "bear the w"
bear the w
orld. and syche, naul amberiated and before of his orders, physhal of etem intenswice, pogeannious
c


## Задание. Генерация панграмм 
Измените код выше так, чтобы генерировались панграммы: последовательности букв, которые содержат одну букву из алфавита один раз. 