In [41]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [42]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, encoding="utf8").read()
raw_text = raw_text.lower()

In [43]:
raw_text

'\ufeffalice’s adventures in wonderland\nlewis carroll\nthe millennium fulcrum edition 3.0\nchapter i. down the rabbit-hole\nalice was beginning to get very tired of sitting by her sister on the\nbank, and of having nothing to do: once or twice she had peeped into the\nbook her sister was reading, but it had no pictures or conversations in\nit, ‘and what is the use of a book,’ thought alice ‘without pictures or\nconversations?’\n\nso she was considering in her own mind (as well as she could, for the\nhot day made her feel very sleepy and stupid), whether the pleasure\nof making a daisy-chain would be worth the trouble of getting up and\npicking the daisies, when suddenly a white rabbit with pink eyes ran\nclose by her.\n\n\nlastly, she pictured to herself how this same little sister of hers\nwould, in the after-time, be herself a grown woman; and how she would\nkeep, through all her riper years, the simple and loving heart of her\nchildhood: and how she would gather about her other lit

In [44]:
#Mapping to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
char_to_int

{'\n': 0,
 ' ': 1,
 '(': 2,
 ')': 3,
 ',': 4,
 '-': 5,
 '.': 6,
 '0': 7,
 '3': 8,
 ':': 9,
 ';': 10,
 '?': 11,
 'a': 12,
 'b': 13,
 'c': 14,
 'd': 15,
 'e': 16,
 'f': 17,
 'g': 18,
 'h': 19,
 'i': 20,
 'j': 21,
 'k': 22,
 'l': 23,
 'm': 24,
 'n': 25,
 'o': 26,
 'p': 27,
 'r': 28,
 's': 29,
 't': 30,
 'u': 31,
 'v': 32,
 'w': 33,
 'y': 34,
 '‘': 35,
 '’': 36,
 '\ufeff': 37}

In [45]:
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  1265
Total Vocab:  38


In [46]:
#Iterating over 100 at a time
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    #print("i",i,"seq_len",seq_length)
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    #print("seq_in",seq_in,"seq_out",seq_out)
    dataX.append([char_to_int[char] for char in seq_in])
    #print("data_x",dataX)
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  1165


In [47]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
X.shape
y.shape

(1165, 37)

In [48]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [49]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [50]:
model.fit(X, y, epochs=1, batch_size=128, callbacks=callbacks_list)

Epoch 1/1


<keras.callbacks.History at 0x185d7a9fe48>

In [None]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
# pick a random seed
import sys
start = numpy.random.randint(0, len(dataX)-1)
print(start,"data lenght",len(dataX))
pattern = dataX[start]
#got one array
print(pattern)
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    #sys.stdout.write(result)
    print(result)
    pattern.append(index)
    #print(pattern)
    pattern = pattern[1:len(pattern)]
print ("\nDone.")