In [12]:
# vectorization - chars to ints
import string
import random
import sys

import numpy as np

from keras.models import load_model

def sample(preds, temperature=1.0):
    """Sample predictions from a probability array"""
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds + 1e-6) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate(model, diversity=0.5, text=""):
    """Generate text from a model"""
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    sys.stdout.write(generated)

    for i in range(5000):
        x = np.zeros((1, maxlen), dtype=np.int)
        for t, char in enumerate(sentence):
            try:
                x[0, t] = char_indices[char]
            except:
                print(sentence)
        preds = model.predict(x, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]
        generated += next_char
        sentence = sentence[1:] + next_char
        sys.stdout.write(next_char)
        sys.stdout.flush()
    return

def vectorize(text):
    """Convert text into character sequences"""
    step = 3
    sentences = []
    next_chars = []
    for i in range(0, len(text) - maxlen, step):
        sentences.append(text[i: i + maxlen])
        next_chars.append(text[i + maxlen])
    X = np.zeros((len(sentences), maxlen), dtype=np.int)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t] = char_indices[char]
        y[i, char_indices[next_chars[i]]] = 1
    return X, y

def clean_text(text, charset):
    text = " ".join(text.split())  # all white space is one space
    text = "".join([x for x in text if x in charset])  # remove characters that we don't care about
    return text

def get_model(modelfile, freeze=False):
    model = load_model(modelfile)
    if freeze:
        for layer in model.layers[:6]:
            layer.trainable = False
    return model

chars = " " + string.ascii_letters + string.punctuation  # sorted to keep indices consistent
charset = set(chars)  # for lookup
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

maxlen = 100  # must match length which generated model - the sequence length

In [2]:
from keras.models import Sequential
from keras.layers import Embedding, Dropout, BatchNormalization, GRU, Dense

def get_gru_model(use_embeddings=False):
    model = Sequential()
    model.add(Embedding(input_dim=len(charset), output_dim=300))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    model.add(GRU(256))
    model.add(Dropout(0.3))
    model.add(BatchNormalization())
    model.add(Dense(85, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

In [4]:
with open("middlemarch.txt") as f:
    middlemarch = f.read()

In [5]:
%%time
test_model = get_gru_model()
X, y = vectorize(clean_text(middlemarch, charset))
test_model.fit(X, y, epochs=10, batch_size=128, validation_split=0.1)

Train on 532465 samples, validate on 59163 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 2h 20min 50s, sys: 13min, total: 2h 33min 51s
Wall time: 1h 38min 51s


In [13]:
generate(test_model, text=clean_text(middlemarch[:1000], charset))

----- Generating with seed: "t girl to her? Her flame quickly burned up that light fuel; and, fed from within, soared after some "
t girl to her? Her flame quickly burned up that light fuel; and, fed from within, soared after some one of the elder which had a husband and he was a coming to the schual and seemed to take him to apt to the light of some practice on the world seem a wish to Dorothea said that it was only think of the expectation some money with the bearen and father. Then that you was promision of here and was less and shaking a little which she is not stimist that the too world that it was not such a subject of any will and who was seemed to have long in his best of this manner had been a more who would be anything for was not such as should be a be much enduced in the new states of seemed to the dead of professionable who was sented that many seems in his own man whom he said to the more chome to be interprite barnose of the story to a family was a double with a good sen

In [14]:
test_model.save("middlemarch_basic_gru.hdf5")