In [1]:
import sys
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using Theano backend.
Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5105)


In [2]:
# load the ascii text and convert to lowercase
filename = "./data/alice_wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
# map unique characters to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [4]:
num_chars = len(raw_text)
num_unique_vocab = len(chars)
print("Total Characters: ", num_chars)
print("Total unique: ", num_unique_vocab)

Total Characters:  144373
Total unique:  45


In [5]:
# prepare the dataset of input to output pairs encoded as integers
# split into subsequences of fixed character length.
# can also split data up by sentences. pad shorter ones. truncate longer ones.
seq_len = 100
dataX = []
dataY = []
for i in range(0, num_chars - seq_len, 1):
    seq_in = raw_text[i:i + seq_len]
    seq_out = raw_text[i + seq_len]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
num_patterns = len(dataX)

In [6]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (num_patterns, seq_len, 1))

In [7]:
np.shape(X)

(144273, 100, 1)

In [8]:
# normalize
# LSTM uses sigmoid activation by default so needs range of 0 - 1
X = X/float(num_unique_vocab)

In [9]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [10]:
# define the LSTM model
# two hidden LSTM layers with 256 memory units
# dropout probability of 20
# output layer is Dense layer using softmax activation function with ADAM optimizer
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [11]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-twolayers.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [12]:
# time execution and fit model
import timeit
start = timeit.default_timer()

model.fit(X, y, nb_epoch=50, batch_size=64, callbacks=callbacks_list)

stop = timeit.default_timer()
print(stop - start) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
26654.400902334124
