In [1]:
import timeit
import sys
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

Using Theano backend.
Using gpu device 0: GeForce GTX 980 Ti (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5105)


In [2]:
# load the ascii text and convert to lowercase
filename = "./data/alice_wonderland.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [3]:
# create mapping of unique characters to int, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [4]:
# summarize the loaded data
num_chars = len(raw_text)
num_vocab = len(chars)
print("Total Characters ", num_chars)
print("Total Unique Vocab ", num_vocab)

Total Characters  144373
Total Unique Vocab  45


In [5]:
# prepare the dataset of input to output pairs encoded as integers
# split into subsequences of fixed character length.
# can also split data up by sentences. pad shorter ones. truncate longer ones.
seq_len = 100
dataX = []
dataY = []
for i in range(0, num_chars - seq_len, 1):
    seq_in = raw_text[i:i + seq_len]
    seq_out = raw_text[i + seq_len]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
num_patterns = len(dataX)

In [6]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (num_patterns, seq_len, 1))

In [7]:
# normalize
# LSTM uses sigmoid activation by default so needs range of 0 - 1
X = X/float(num_vocab)

In [8]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [9]:
# define the LSTM model
# two LSTM layers with 256 memory units
# dropout probability of 20
# output layer is Dense layer using softmax activation function with ADAM optimizer
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

In [10]:
# load network weights with smallest loss value from previous training
filename = "./weights/weights-improvement-40-1.3063-twolayers.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [11]:
# pick a random seed
start = np.random.randint(0, len(dataX) -1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" 

the king turned pale, and shut his note-book hastily. ‘consider your
verdict,’ he said to the jury "


In [12]:
start = timeit.default_timer()

# generate characters
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x/float(num_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print("\nFinished")
stop = timeit.default_timer()
print(stop - start)

. ‘i must have to she bert wiine,’

‘what a pueer to be as all ’ said the daterpillar.

‘well, i should shink i must be a batce,’ said the duchess, ‘and the mosal of the sea- the duchess! the fuchess and the sea- the duchess and the sea- the fuchess said ‘it is the wayeng!’

‘i don’t know what you do,’ said the daterpillar.

‘well, i should shink i must be a batce,’ said the daterpillar.

‘well, i should shink i meant with the rueen!’ said the duchess, ‘and the mosal of the sea- the duchess! the fuchess and the sea- the duchess and the sea- the fuchess said ‘it is the wayeng!’

‘i don’t know what you do,’ said the daterpillar.

‘well, i should shink i must be a batce,’ said the daterpillar.

‘well, i should shink i meant with the rueen!’ said the duchess, ‘and the mosal of the sea- the duchess! the fuchess and the sea- the duchess and the sea- the fuchess said ‘it is the wayeng!’

‘i don’t know what you do,’ said the daterpillar.

‘well, i should shink i must be a batce,’ said the date