In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using TensorFlow backend.


In [2]:
filename = "wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters:{}".format(n_chars))
print("Total Vocab:{}".format(n_vocab))

Total Characters:163815
Total Vocab:60


In [5]:
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns:{}".format(n_patterns))

Total Patterns:163715


In [7]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [43]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
#model.add(LSTM(256))
#model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=50, batch_size=64, callbacks=callbacks_list)

Epoch 1/50

Epoch 00001: loss improved from inf to 2.95154, saving model to weights-improvement-01-2.9515-bigger.hdf5
Epoch 2/50

Epoch 00002: loss improved from 2.95154 to 2.75160, saving model to weights-improvement-02-2.7516-bigger.hdf5
Epoch 3/50

Epoch 00003: loss improved from 2.75160 to 2.65335, saving model to weights-improvement-03-2.6534-bigger.hdf5
Epoch 4/50

Epoch 00004: loss improved from 2.65335 to 2.57527, saving model to weights-improvement-04-2.5753-bigger.hdf5
Epoch 5/50

Epoch 00005: loss improved from 2.57527 to 2.49930, saving model to weights-improvement-05-2.4993-bigger.hdf5
Epoch 6/50

Epoch 00006: loss improved from 2.49930 to 2.43210, saving model to weights-improvement-06-2.4321-bigger.hdf5
Epoch 7/50

Epoch 00007: loss improved from 2.43210 to 2.37436, saving model to weights-improvement-07-2.3744-bigger.hdf5
Epoch 8/50

Epoch 00008: loss improved from 2.37436 to 2.32109, saving model to weights-improvement-08-2.3211-bigger.hdf5
Epoch 9/50

Epoch 00009: los


Epoch 00042: loss improved from 1.69663 to 1.68730, saving model to weights-improvement-42-1.6873-bigger.hdf5
Epoch 43/50

Epoch 00043: loss improved from 1.68730 to 1.68136, saving model to weights-improvement-43-1.6814-bigger.hdf5
Epoch 44/50

Epoch 00044: loss improved from 1.68136 to 1.67826, saving model to weights-improvement-44-1.6783-bigger.hdf5
Epoch 45/50

Epoch 00045: loss improved from 1.67826 to 1.67429, saving model to weights-improvement-45-1.6743-bigger.hdf5
Epoch 46/50

Epoch 00046: loss improved from 1.67429 to 1.66460, saving model to weights-improvement-46-1.6646-bigger.hdf5
Epoch 47/50

Epoch 00047: loss improved from 1.66460 to 1.65271, saving model to weights-improvement-47-1.6527-bigger.hdf5
Epoch 48/50

Epoch 00048: loss did not improve from 1.65271
Epoch 49/50

Epoch 00049: loss improved from 1.65271 to 1.64806, saving model to weights-improvement-49-1.6481-bigger.hdf5
Epoch 50/50

Epoch 00050: loss improved from 1.64806 to 1.64264, saving model to weights-im

<keras.callbacks.History at 0x7f6bdeb1a518>

In [44]:
# load the network weights
filename = "weights-improvement-50-1.6426-bigger.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [45]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))
print(start)
print(pattern)

110725
[32, 34, 48, 1, 38, 43, 1, 49, 37, 34, 38, 47, 1, 45, 30, 52, 48, 11, 0, 0, 56, 30, 43, 33, 1, 37, 44, 52, 1, 42, 30, 43, 54, 1, 37, 44, 50, 47, 48, 1, 30, 1, 33, 30, 54, 1, 33, 38, 33, 1, 54, 44, 50, 1, 33, 44, 1, 41, 34, 48, 48, 44, 43, 48, 25, 57, 1, 48, 30, 38, 33, 1, 30, 41, 38, 32, 34, 9, 1, 38, 43, 1, 30, 1, 37, 50, 47, 47, 54, 1, 49, 44, 0, 32, 37, 30, 43, 36, 34, 1]


In [46]:
import sys
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

the pupjrstio.

‘io yhu the pabt of think!’ thi ganter want, tiriing to aeiin igaiing. 
‘whal i can’t be a mett ro the teal to the pemt oite!’ said the kanter. 
‘i dene tou donn the roett suam ’hur mooe,’ said the mouge, 
‘ie toued io the mert of thet ar the saal then i mo,’ said the kante rante ‘once. ‘iov aelind to ba ii  mh dirrsr,’ 
‘h don’t know what i sool toe mioe than ’ said the monke, ‘i most en would ie wound be ko dltttr the madt ofde io the dane,-
the docm shen shen she wan sointing oo to the that, and the white rabbit inee aoond the was so tie thame whth their siaecs, and the thou ht was ooe of them with tie wilne the was soitting to be in a lotte toide, ‘hhn the luoy be a ba moce in the listle bu the whitew as the luosr, and the waited tf ten to the merthoe oo the taaeet to sey to hir toapes, and whnt hir toict to sie kant, 
‘the was toent it was in an a latter on the dirtt benue the white!’ she shiught th herself, ‘i don’t think to be is ailind to tey the maac of theng w