In [3]:
# Run this!
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.callbacks import LambdaCallback, ModelCheckpoint
import numpy as np
import random, sys, io, string

In [2]:
text = io.open('Data/The Time Machine.txt', encoding = 'UTF-8').read()

# Let's have a look at some of the text
print(text[0:198])

# This cuts out punctuation and make all the characters lower case
text = text.lower().translate(str.maketrans("", "", string.punctuation))

# Character index dictionary
charset = sorted(list(set(text)))
index_from_char = dict((c, i) for i, c in enumerate(charset))
char_from_index = dict((i, c) for i, c in enumerate(charset))

print('text length: %s characters' %len(text))
print('unique characters: %s' %len(charset))

﻿The Time Traveller (for so it will be convenient to speak of him) was expounding a recondite matter to us. His pale grey eyes shone and twinkled, and his usually pale face was flushed and animated.
text length: 174201 characters
unique characters: 39


In [3]:
sequence_length = 40
step = 4

sequences = []
target_chars = []
for i in range(0, len(text) - sequence_length, step):
    sequences.append([text[i: i + sequence_length]])
    target_chars.append(text[i + sequence_length])
print('number of training sequences:', len(sequences))

number of training sequences: 43541


In [4]:
# One-hot vectorise

X = np.zeros((len(sequences), sequence_length, len(charset)), dtype=np.bool)
y = np.zeros((len(sequences), len(charset)), dtype=np.bool)

for n, sequence in enumerate(sequences):
    for m, character in enumerate(list(sequence[0])):
        X[n, m, index_from_char[character]] = 1
    y[n, index_from_char[target_chars[n]]] = 1


In [5]:
model = Sequential()

model.add(LSTM(128, input_shape = (X.shape[1], X.shape[2])))

model.add(Dense(y.shape[1], activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')


In [6]:
# Run this, but do not edit.
# It helps generate the text and save the model epochs.

# Generate new text
def on_epoch_end(epoch, _):
    diversity = 0.5
    print('\n### Generating text with diversity %0.2f' %(diversity))

    start = random.randint(0, len(text) - sequence_length - 1)
    seed = text[start: start + sequence_length]
    print('### Generating with seed: "%s"' %seed[:40])

    output = seed[:40].lower().translate(str.maketrans("", "", string.punctuation))
    print(output, end = '')

    for i in range(500):
        x_pred = np.zeros((1, sequence_length, len(charset)))
        for t, char in enumerate(output):
            x_pred[0, t, index_from_char[char]] = 1.

        predictions = model.predict(x_pred, verbose=0)[0]
        exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)
        next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))
        next_char = char_from_index[next_index]

        output = output[1:] + next_char

        print(next_char, end = '')
    print()
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

# Save the model
checkpoint = ModelCheckpoint('Models/model-epoch-{epoch:02d}.hdf5', 
                             monitor = 'loss', verbose = 1, save_best_only = True, mode = 'min')

In [7]:
model.fit(X, y, batch_size = 128, epochs = 3, callbacks = [print_callback, checkpoint])

Epoch 1/3
### Generating text with diversity 0.50
### Generating with seed: "r the clinging hands slipped from me the"
r the clinging hands slipped from me the the  an te thy at so se  heli hle in ti in n s ime  mate  an tae wi t tred ter i ten tooed tee s fao w we o  he t aa iot ai e w ae an pe te cn the n e ine te  tor the ae e the t e me me in the  he s on le t ie s d d sor t s t toithe the anat ts an ti the an bhi le ai ace s ror the the the w anre tee the  arp ai the the ne mer irh aare toune the me the  hu iur tr the t te s fe tore tiin the tre bi he coe n the a d te iae s thes tle mes len wan  s selon the tse t me ina fed d e toe the ti th s pe

Epoch 00001: loss improved from inf to 2.74548, saving model to Models\model-epoch-01.hdf5
Epoch 2/3
### Generating text with diversity 0.50
### Generating with seed: "elped the editor on with his coat the me"
elped the editor on with his coat the me at in theg ind the hand wore and ond the se the chat out or ale toi s awe fot io the gas

<tensorflow.python.keras.callbacks.History at 0x25d38ddcfd0>

In [1]:
from keras.models import load_model
print("loading model... ", end = '')

model = load_model('Models/arthur-model-epoch-30.hdf5')
model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam')

print("model loaded")

loading model... model loaded


In [4]:
text = io.open('Data/Arthur tales.txt', encoding='UTF-8').read()

# Cut out punctuation and make lower case
text = text.lower().translate(str.maketrans("", "", string.punctuation))

# Character index dictionary
charset = sorted(list(set(text)))
index_from_char = dict((c, i) for i, c in enumerate(charset))
char_from_index = dict((i, c) for i, c in enumerate(charset))

print('text length: %s characters' %len(text))
print('unique characters: %s' %len(charset))


text length: 3645951 characters
unique characters: 43


In [5]:
# Generate text

diversity = 0.5
print('\n### Generating text with diversity %0.2f' %(diversity))

sequence_length = 50

# Next we'll make a starting point for our text generator

###
# REPLACE <writeSentence> WITH A SENTENCE OF AT LEAST 50 CHARACTERS
###
# seed = "<writeSentence>"
###

# seed = seed.lower().translate(str.maketrans("", "", string.punctuation))

###
# OR, ALTERNATIVELY, UNCOMMENT THE FOLLOWING TWO LINES AND GRAB A RANDOM STRING FROM THE TEXT FILE
###

start = random.randint(0, len(text) - sequence_length - 1)
seed = text[start: start + sequence_length]

print('### Generating with seed: "%s"' %seed[:40])

output = seed[:sequence_length].lower().translate(str.maketrans("", "", string.punctuation))
print(output, end = '')

for i in range(1500):
    x_pred = np.zeros((1, sequence_length, len(charset)))
    for t, char in enumerate(output):
        x_pred[0, t, index_from_char[char]] = 1.

    predictions = model.predict(x_pred, verbose=0)[0]
    exp_preds = np.exp(np.log(np.asarray(predictions).astype('float64')) / diversity)
    next_index = np.argmax(np.random.multinomial(1, exp_preds / np.sum(exp_preds), 1))
    next_char = char_from_index[next_index]

    output = output[1:] + next_char

    print(next_char, end = '')
print()


### Generating text with diversity 0.50
### Generating with seed: "m the object of his journey “i have none"
m the object of his journey “i have none” he replied  “and then said the earl “if thou wilt never be but a lady of the forest said sir tristram i will take the sword to the court and that will not meet and when they were brought to the earth and then he saw a fair thing field in the forest and sir gawaine to his enemies and then sir gawaine said all the death of my power i was sir lancelot and sir launcelot by the faith of his horse with the chamber and so as he was come and then sir tristram heard that he came to the castle and there was a chamber that he had never marvel that we shall not do so much as i may not be sir gawaine and for the kings son that ye shall be given for thee who shall be able to be brought out of him and that was the good knight when the king and the strongest man saw he said with the knight and this is he that is your name to her the heart of the tower 