In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import sys

Using TensorFlow backend.


In [2]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  144325
Total Vocab:  45


In [5]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)

print(dataX[0])
print ("Total Patterns: ", n_patterns)

[17, 22, 15, 30, 34, 19, 32, 1, 23, 8, 1, 18, 29, 37, 28, 1, 34, 22, 19, 1, 32, 15, 16, 16, 23, 34, 7, 22, 29, 26, 19, 0, 0, 15, 26, 23, 17, 19, 1, 37, 15, 33, 1, 16, 19, 21, 23, 28, 28, 23, 28, 21, 1, 34, 29, 1, 21, 19, 34, 1, 36, 19, 32, 39, 1, 34, 23, 32, 19, 18, 1, 29, 20, 1, 33, 23, 34, 34, 23, 28, 21, 1, 16, 39, 1, 22, 19, 32, 1, 33, 23, 33, 34, 19, 32, 1, 29, 28, 1, 34]
Total Patterns:  144225


In [6]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [7]:
# define the basic LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [7]:
# large RNN-LSTM
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [8]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [11]:
model.fit(X, y, epochs=8, batch_size=100, callbacks=callbacks_list)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f7823252438>

In [12]:
# load the network weights
filename = "weights-improvement-07-1.8130.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [19]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" ’

‘she boxed the queen’s ears--’ the rabbit began. alice gave a little
scream of laughter. ‘oh, hus "
s a mittle bool of the mittle oarter of the mittle oarter of the mittle oarter of the mittle of the rabbit of the rabbit of the rabbit of the rabbit of the rabbit of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the right of the rabbit was the righ