<img src="export/code/tdd-poem-algorithm3.gif" alt="drawing" width="500"/>

Adapted from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py

In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Activation, Dropout
#from keras.layers import CuDNNLSTM
from keras.callbacks import ModelCheckpoint
import random
import sys
import io
import warnings
warnings.filterwarnings("ignore")

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
artist = "pushkin"
with open(f"{artist}.txt", encoding="utf-8") as corpus_file:
    corpus = corpus_file.read()
print("Loaded a corpus of {0} characters".format(len(corpus)))

# Get a unique identifier for each char in the corpus, then make some dicts to ease encoding and decoding
chars = sorted(list(set(corpus)))
num_chars = len(chars)
encoding = {c: i for i, c in enumerate(chars)}
decoding = {i: c for i, c in enumerate(chars)}
print("Our corpus contains {0} unique characters.".format(num_chars))


sentence_length = 50
X_data = []
y_data = []
for i in range (0, len(corpus) - sentence_length):
    sentence = corpus[i:i + sentence_length]
    next_char = corpus[i + sentence_length]
    X_data.append([encoding[char] for char in sentence])
    y_data.append(encoding[next_char])

Loaded a corpus of 788921 characters
Our corpus contains 141 unique characters.


In [3]:
num_sentences = len(X_data)
print("Sliced our corpus into {0} sentences of length {1}".format(num_sentences, sentence_length))

# Vectorize our data and labels. We want everything in one-hot
# because smart data encoding cultivates phronesis and virtue.
print("Vectorizing X and y...")
X = np.zeros((num_sentences, sentence_length, num_chars), dtype=np.bool)
y = np.zeros((num_sentences, num_chars), dtype=np.bool)
for i, sentence in enumerate(X_data):
    for t, encoded_char in enumerate(sentence):
        X[i, t, encoded_char] = 1
    y[i, y_data[i]] = 1

# Double check our vectorized data before we sink hours into fitting a model
print("Sanity check y. Dimension: {0} # Sentences: {1} Characters in corpus: {2}".format(y.shape, num_sentences, len(chars)))
print("Sanity check X. Dimension: {0} Sentence length: {1}".format(X.shape, sentence_length))

Sliced our corpus into 788871 sentences of length 50
Vectorizing X and y...
Sanity check y. Dimension: (788871, 141) # Sentences: 788871 Characters in corpus: 141
Sanity check X. Dimension: (788871, 50, 141) Sentence length: 50


In [5]:
model = Sequential()
model.add(LSTM(256, input_shape=(sentence_length, num_chars), return_sequences=True))
#model.add(CuDNNLSTM(256, input_shape=(sentence_length, num_chars), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
#model.add(CuDNNLSTM(256))
model.add(Dense(num_chars, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [6]:
model.load_weights(f'{artist}.hdf5')

In [7]:
def sample(preds, temperature=0.6):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [8]:
def generate(temperature):

    start_index = random.randint(0, len(corpus) - sentence_length - 1)

    speak = '<speak>'
    sentence = corpus[start_index: start_index + sentence_length]

    after_nl = False
    for i in range(1000):
        x_pred = np.zeros((1, sentence_length, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, encoding[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = decoding[next_index]
      
        sentence = sentence[1:] + next_char
        if next_char == '\n' and not after_nl:
            after_nl = True
            continue
        if not after_nl:
            continue
        sys.stdout.write(next_char)
        if next_char == '\n':
            speak += '<break time="30ms"/> '
        speak += next_char
        sys.stdout.flush()
        if next_char == '.':
            break
    return speak + '</speak>'

In [9]:
result = generate(0.6)

Братии сосне томим, забот,
Своей на встречу за веселием,
Я даже славно за душа моим.

## Сохраним текст для Алексы

In [20]:
def save_for_alexa(text):
    with open('alexa.txt', 'w') as f:
        f.write(text)

In [21]:
save_for_alexa(result)