In [130]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [2]:
raw_text = open('wonderland.txt').read()

In [6]:
raw_text = raw_text.lower()

In [14]:
chars = sorted(list(set(raw_text)))

In [40]:
n_vocab = len(chars)

In [76]:
n_chars = len(raw_text)
n_chars

144410

In [19]:
char_to_int = dict((char,index) for index, char in enumerate(chars))


In [131]:
seq_len = 100
dataX = []
dataY = []



In [132]:
for i in range(0,n_chars-seq_len,1):
    seq_in = raw_text[i:seq_len+i]
    seq_out = raw_text[seq_len+i]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append([char_to_int[char] for char in seq_out])

In [133]:
n_patterns = len(dataX)

In [134]:
n_patterns

144310

In [135]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_len, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [136]:
y.shape

(144310, 45)

In [137]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [139]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=2, batch_size=128, callbacks=callbacks_list)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x2283cfeceb8>

In [140]:
# load weights from checkpoint
filename = "weights-improvement-01-2.7919.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [148]:
# Give a seed input ( take it randomly from dataX pattern)
pattern_idx = numpy.random.randint(0,len(dataX)-1)

In [149]:
pattern = dataX[pattern_idx]

In [150]:
# Make a dictionary of int_to_char lookup
int_to_char = dict((index,char) for index, char in enumerate(chars))


In [151]:
# look up the seed and convert to characters and print it out
#print([int_to_char[i] for i in pattern])

print ("".join([int_to_char[value] for value in pattern]))

lf in a melancholy
tone. 'nobody seems to like her, down here, and i'm sure she's the best
cat in th


In [145]:
# convert the input string to a numpy array to pass it on for LSTM prediction
len(pattern)

100

In [146]:
pattern

[37,
 38,
 9,
 9,
 40,
 23,
 36,
 22,
 27,
 21,
 38,
 1,
 19,
 24,
 38,
 23,
 36,
 41,
 19,
 36,
 22,
 37,
 10,
 4,
 0,
 0,
 4,
 37,
 38,
 39,
 24,
 24,
 1,
 19,
 32,
 22,
 1,
 32,
 33,
 32,
 37,
 23,
 32,
 37,
 23,
 2,
 4,
 1,
 37,
 19,
 27,
 22,
 1,
 19,
 30,
 27,
 21,
 23,
 1,
 30,
 33,
 39,
 22,
 30,
 43,
 10,
 1,
 4,
 38,
 26,
 23,
 1,
 27,
 22,
 23,
 19,
 1,
 33,
 24,
 1,
 26,
 19,
 40,
 27,
 32,
 25,
 1,
 38,
 26,
 23,
 0,
 37,
 23,
 32,
 38,
 23,
 32,
 21,
 23,
 1]

In [153]:
# predict for next 1000 characters using model.predict and print it out
    # use np.argmax to output the max predicted value from the output array
import sys
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print( "\nDone.")

Seed:
" house opened, and a large plate came
skimming out, straight at the footman's head: it just grazed hi "
 the  and the  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  and toe  a