# Text Generation Exercise with Keras
Using section 8.1 in Deep Learning with Python as a guide, I implement an LSTM text generator. The model is trained on the the War of the Worlds text from https://www.gutenberg.org/ebooks/36.

### Download and parse initial text file

In [1]:
import keras
import numpy as np
from pathlib import Path
import os

current_dir = Path(os.getcwd()).absolute()
f_path = current_dir.joinpath('war_of_the_worlds.txt')

path = keras.utils.get_file(f_path,
                           origin=f_path)

text = open(path, encoding="utf-8").read().lower()
text = text.replace('\n',' ') # Remove new line characters

print('Corpus length:',len(text))

Corpus length: 357027


### Vectorize sequence of characters

In [2]:
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i:i+maxlen])
    next_chars.append(text[i+maxlen])
    
print('Number of sequences:', len(sentences))
chars = sorted(list(set(text)))
print('Unique characters:', len(chars))
char_indicies = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i,t,char_indicies[char]] = 1
    y[i, char_indicies[next_chars[i]]] = 1


Number of sequences: 118989
Unique characters: 64
Vectorization...


### Build network

In [3]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer = optimizer)

### Train language model and sample from it

##### Function to sample next character given model's prediction

In [4]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds)/temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probs = np.random.multinomial(1, preds, 1)
    return np.argmax(probs)

##### Text generation loop

In [8]:
import random
import sys

results_dir = current_dir.joinpath('results')
generated_text_file_path = results_dir.joinpath('generated_text.txt')

for epoch in range(1, 51):
    print('epoch', epoch)
    model.fit(x, y, batch_size=128, steps_per_epoch=300, epochs=1)
    
    # Generate Texts for epochs in multiples of 10
    if epoch % 10 == 0:
        
        # Save Model for epoch
        model_name = 'WarOfTheWorlds_Epoch' + str(epoch) +'.h5'
        model_file_path = results_dir.joinpath(model_name)
        model.save(model_file_path)
        
        # Get random starting point to start generating text from
        start_index = random.randint(0, len(text) - maxlen - 1)
        sampled_text = text[start_index: start_index+maxlen]
        print('---Generating with seed: \n"' + sampled_text +'"\n')
        
        # Write model name and sampled text to txt file
        with open(generated_text_file_path, "a") as f:
            f.write('Model: {}\n'.format(model_name))
            f.write('Sampled Text: {}\n'.format(sampled_text))
        
        # Generate text for 4 different temperatures
        for temperature in [0.2, 0.5, 1.0, 1.2]:
            print('\n------ temperature:', temperature)
            sys.stdout.write(sampled_text)
            generated_text = sampled_text
            generated_text_full = generated_text
            
            # Generate 400 more characters to add to sampled text
            for i in range(400):
                sampled = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indicies[char]] = 1.

                preds = model.predict(sampled, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = chars[next_index]
                
                generated_text += next_char
                generated_text = generated_text[1:]
                sys.stdout.write(next_char)
                generated_text_full += next_char
            
            # Write temperature and generated text to txt file
            with open(generated_text_file_path, "a") as f:
                f.write('Temperature: {}\n'.format(temperature))
                f.write('Full Generated Text: {}\n'.format(generated_text_full))
                
        with open(generated_text_file_path, "a") as f:
            f.write('\n'.format(generated_text_full))

epoch 1
epoch 2
epoch 3
epoch 4
epoch 5
epoch 6
epoch 7
epoch 8
epoch 9
epoch 10
---Generating with seed: 
"s above the hood.  i gave a cry of astonishment. i saw and t"


------ temperature: 0.2
s above the hood.  i gave a cry of astonishment. i saw and the strangest of the stard the ground the martians were the houses of the thing of the starling of the martians were the street of the starling of the thirs was so the strangest of the street of the street of the street of the third the stranger of the street of the pit of the pit of the third the starling of the starling of the street of the pit and side of the strangest of the strangest of the st
------ temperature: 0.5
s above the hood.  i gave a cry of astonishment. i saw and the artilleryman, and the road, and the steambrees and the first with senters was running the milighturide of the third of the stranger and the cart to the stoul accound of the guns of fire of the starling this across the third, and the black poot might get so