In [4]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [5]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [8]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print( "Total Vocab: ", n_vocab)

Total Characters:  163817
Total Vocab:  60


In [12]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  163717


In [13]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [14]:
# define the LSTM model
# model = Sequential()
# model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
# model.add(Dropout(0.2))
# model.add(Dense(y.shape[1], activation='softmax'))
# model.compile(loss='categorical_crossentropy', optimizer='adam')

model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [15]:
# define the checkpoint
#filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"


filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 2.99764, saving model to weights-improvement-01-2.9976.hdf5
Epoch 2/20

Epoch 00002: loss improved from 2.99764 to 2.82911, saving model to weights-improvement-02-2.8291.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.82911 to 2.74107, saving model to weights-improvement-03-2.7411.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.74107 to 2.67366, saving model to weights-improvement-04-2.6737.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.67366 to 2.61748, saving model to weights-improvement-05-2.6175.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.61748 to 2.56558, saving model to weights-improvement-06-2.5656.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.56558 to 2.51313, saving model to weights-improvement-07-2.5131.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.51313 to 2.46636, saving model to weights-improvement-08-2.4664.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.46636 to 2.42225, saving model to weig

KeyboardInterrupt: 

In [16]:
# load the network weights
filename = "weights-improvement-11-2.3439.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [18]:
int_to_char = dict((i, c) for i, c in enumerate(chars))
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
    
print("\nDone.")

Seed:
" said alice.

‘i mean what i say,’ the mock turtle replied in an offended tone. and
the gryphon added "
 to tee thet she was so the toiee thre the was so the caal to tee thet she was so the toiee the whit woue  ‘he wou dad toe toiee the toiee to the thet wou dade ’huh the toiee the woiee the woiee the woiee tae io the was so the caal, and the was soen io the care whe woiee whr oo the taster and the was so the caal, and the was goln to the thre the was so the caree th the thete was she was so the toiee the whit woue  ‘he wou dad toe toiee the toiee to the thet wou dade ’huh the toiee the woiee the woiee the woiee tae io the was so the caal, and the was soen io the care whe woiee whr oo the taster and the was so the caal, and the was goln to the thre the was so the caree th the thete was she was so the toiee the whit woue  ‘he wou dad toe toiee the toiee to the thet wou dade ’huh the toiee the woiee the woiee the woiee tae io the was so the caal, and the was soen io the care whe w