In [1]:
import numpy as np
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.optimizers import RMSprop
import sys
import random
import math

Using TensorFlow backend.


### Load Text File

In [2]:
# data_path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
data_path = "test_data.txt"
raw_text = open(data_path).read().lower()

### Build Vocabulary

In [3]:
unique_chars = sorted(list(set(raw_text))) # generate list of unique characters
print(unique_chars)
char_to_int = dict((c, i) for i, c in enumerate(unique_chars))
int_to_char = dict((i, c) for i, c in enumerate(unique_chars))

num_chars = len(raw_text)
len_vocab = len(unique_chars)

print("\nTotal characters:\t" + str(num_chars))
print("Length of vocabulary:\t" + str(len_vocab))

['\n', ' ', ',', '.', ':', ';', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'y']

Total characters:	212
Length of vocabulary:	29


### Define Model

I will use a single hidden LSTM layer with 256 memory units and a dropout probability of 20%. The dense layer will use a softmax activation to output a probability prediction for each of the characters, between 0 and 1.

In [4]:
sequence_length = 40

learning_rate = 0.01
optimizer = RMSprop(lr=learning_rate)
# optimizer = 'adam'
num_memory_units = 256

model = Sequential()

model.add(LSTM(num_memory_units, input_shape=(sequence_length, len_vocab)))
# model.add(LSTM(num_memory_units, input_shape=(x.shape[1], x.shape[2]))) # x_data.shape[1] -> seq. length, x_data.shape[2] -> 1
model.add(Dropout(0.2))

# in language modeling, the final output should be a probability distribution, which makes softmax activation more make sense than sigmoid.
model.add(Dense(len_vocab))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer=optimizer)

"We are not interested in the most accurate (classification accuracy) model of the training dataset. This would be a model that predicts each character in the training dataset perfectly. Instead we are interested in a generalization of the dataset that minimizes the chosen loss function. We are seeking a balance between generalization and overfitting but short of memorization."

## Predict

In [5]:
# load weights
weights_path = '_char_weights.hdf5'
model.load_weights(weights_path)
model.compile(loss='categorical_crossentropy', optimizer='adam')

ValueError: Dimension 0 in both shapes must be equal, but are 29 and 32 for 'Assign' (op: 'Assign') with input shapes: [29,512], [32,512].

The process of predicting goes something like this:

1. Generate a seed sequence as input
2. Generate the next character
3. Update the seed sequece to add the generated character<br>
    3.1. Add the generated character to the end<br>
    3.2. Trim the first character
    
This is repeated an arbitrary number of times - for as many characters one wants to generate.

In [None]:
chars_to_generate = 400

# pick a random seed
random_index_start = np.random.randint(0, num_chars - sequence_length - 1)
seed_pattern = raw_text[random_index_start : random_index_start + sequence_length]

print("-> seed: \"" + seed_pattern + "\"")

# save seed as integers
# seed_pattern = np.asarray([char_to_int[ch] for ch in seed_pattern])

for i in range(chars_to_generate):
    
    x_input = np.zeros((1, sequence_length, len_vocab))
    for t, char in enumerate(seed_pattern):
        x_input[0, t, char_to_int[char]] = 1.
        
    predictions = model.predict(x_input, verbose=0)[0]
    predicted_char_index = np.argmax(predictions)
    predicted_char = int_to_char[predicted_char_index]

    seed_pattern = seed_pattern[1:] + predicted_char

    sys.stdout.write(predicted_char)
    sys.stdout.flush()

* https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py
* https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/