In [1]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import time

Using TensorFlow backend.


In [2]:
# load ascii text and convert to lowercase
with open('wonderland.txt', 'r') as f:
    raw_text = f.read()
    raw_text = raw_text.lower()

In [3]:
# create mapping of unique chars to ints
vocab = sorted(set(raw_text))
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = [vocab_to_int[c] for c in raw_text]

In [4]:
n_chars = len(raw_text)
n_vocab = len(vocab)
print("Total chars: {}".format(n_chars))
print("Total unique chars: {}".format(n_vocab))

Total chars: 144414
Total unique chars: 47


In [5]:
raw_text[:100]

'alice’s adventures in wonderland\n\nlewis carroll\n\nthe millennium fulcrum edition 3.0\n\n\n\n\nchapter i. d'

In [6]:
# prepare dataset of input and output
seq_len = 100
X = []
y = []
for i in range(0, n_chars - seq_len, 1):
    seq_in = encoded[i:i + seq_len]
    seq_out = encoded[i + seq_len]
    X.append(seq_in)
    y.append(seq_out)
    
n_patterns = len(X)
print("Total patterns: {}".format(n_patterns))

Total patterns: 144314


In [7]:
X = numpy.reshape(X, newshape=(n_patterns, seq_len, 1))
X = X/float(n_vocab)
y = np_utils.to_categorical(y)

In [8]:
y.shape

(144314, 47)

In [9]:
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [10]:
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [11]:
start = time.time()
model.fit(X, y, epochs=2, batch_size=128, callbacks=callbacks_list, verbose=1)
end = time.time()
print("Total training time(sec): {:.3f}".format(end - start))

Epoch 1/2
Epoch 2/2
Total training time in second: 1069.059
