In [1]:
from keras.utils.data_utils import get_file
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import LambdaCallback
import numpy as np
import sys
import io

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
from google.colab import files
uploaded = files.upload()

Saving text.txt to text.txt


In [0]:
text = open("text.txt").read().lower()

In [4]:
print(len(text))

16275513


In [0]:
characters = sorted(list(set(text)))

In [6]:
characters

[' ',
 '!',
 '"',
 '#',
 '$',
 '&',
 "'",
 '(',
 ')',
 '*',
 ',',
 '-',
 '.',
 '/',
 '0',
 '1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 ':',
 ';',
 '=',
 '>',
 '?',
 '@',
 '[',
 ']',
 '_',
 '`',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z',
 '{',
 '|',
 '}',
 '°',
 'à',
 'â',
 'æ',
 'ç',
 'è',
 'é',
 'ê',
 'ë',
 'î',
 'ï',
 'ô',
 'ù',
 'û',
 'ü',
 'œ',
 '\u2009',
 '—',
 '‘',
 '’',
 '“',
 '”',
 '⁂']

In [0]:
character_indices = dict((c, i) for i, c in enumerate(characters))
indices_characters = dict((i, c) for i, c in enumerate(characters))

In [8]:
# takes fourty characters as a 'sentence', then the one character immediately 
# following that sentence as a 'next_char'
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 5425158


In [9]:
# vectorizing each sentence as x, the next character to predict is y
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(characters)), dtype=np.bool)
y = np.zeros((len(sentences), len(characters)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, character_indices[char]] = 1
    y[i, character_indices[next_chars[i]]] = 1

Vectorization...


In [10]:
print('Building model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(characters))))
model.add(Dense(len(characters), activation='softmax'))

Building model...


In [0]:
optimizer = RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = np.random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(characters)))
            for t, char in enumerate(sentence):
                x_pred[0, t, character_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_characters[next_index]

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [0]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [0]:
model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "e_ blacklybored when they are shut up in"
e_ blacklybored when they are shut up in the success of a little sense of the success of the things that he was a perfect the success of her subject of the conscious and the conscious and the sense of the success of a moment to the subject of the prodement of the dear and sublime and the conscious to the subject of the success of the new prest of the face of a sort of the present of the artist of the way that the subject of the part of 
----- diversity: 0.5
----- Generating with seed: "e_ blacklybored when they are shut up in"
e_ blacklybored when they are shut up in the companion of more with a reader of it more and and away as if he had been a taminess of the positive time, and and i should take her strange was marry her bound the part of the sort of an insuderion, he was an interestion, that she looked at her break the face in the time, in the p