In [41]:
# Small LSTM Network to Generate Text for Alice in Wonderland
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [42]:
# load ascii text and covert to lowercase
filename = "rgold.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  83661
Total Vocab:  51


In [43]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  83561


In [44]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [45]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [46]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=20, batch_size=128, callbacks=callbacks_list)

Epoch 1/20

Epoch 00001: loss improved from inf to 3.02681, saving model to weights-improvement-01-3.0268.hdf5
Epoch 2/20

Epoch 00002: loss improved from 3.02681 to 2.87944, saving model to weights-improvement-02-2.8794.hdf5
Epoch 3/20

Epoch 00003: loss improved from 2.87944 to 2.79622, saving model to weights-improvement-03-2.7962.hdf5
Epoch 4/20

Epoch 00004: loss improved from 2.79622 to 2.75004, saving model to weights-improvement-04-2.7500.hdf5
Epoch 5/20

Epoch 00005: loss improved from 2.75004 to 2.70430, saving model to weights-improvement-05-2.7043.hdf5
Epoch 6/20

Epoch 00006: loss improved from 2.70430 to 2.65810, saving model to weights-improvement-06-2.6581.hdf5
Epoch 7/20

Epoch 00007: loss improved from 2.65810 to 2.61819, saving model to weights-improvement-07-2.6182.hdf5
Epoch 8/20

Epoch 00008: loss improved from 2.61819 to 2.58166, saving model to weights-improvement-08-2.5817.hdf5
Epoch 9/20

Epoch 00009: loss improved from 2.58166 to 2.54607, saving model to weig

<keras.callbacks.callbacks.History at 0x16a657b00>

In [47]:
# Generate Text

In [58]:
# Load LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [59]:
# load ascii text and covert to lowercase
filename = "rgold.txt"
raw_text = open(filename, 'r', encoding='utf-8').read()
raw_text = raw_text.lower()

In [60]:
raw_text



In [61]:
# create mapping of unique chars to integers, and a reverse mapping
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [62]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print( "Total Vocab: ", n_vocab)

Total Characters:  83661
Total Vocab:  51


In [63]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 1000
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  82661


In [64]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
# load the network weights
filename = "weights-improvement-20-2.1866.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [65]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

Seed:
" sword from the stem. o might i today find here the friend; come from afar to the saddest wife: what e\'er i have suffered in bitterest pain, what e\'er i have borne in shame and disgrace, sweet were my vengeance, all were atoned for! regained were then whate\'er i had lost, and won, too, were then all i have wept for, found the delivering friend, my hero held in my arms! siegmund (embracing sieglinde with ardor) thee, woman most blest, holds now the friend, for weapon and wife decreed! hot in my breast burns now the oath that weds me ever to thee. whate\'er i have sought in thee now i see; in thee all that has failed me is found! though thou wert shamed and woe was my lot; though i was scorned and dishonored wert thou: joyful revenge now laughs in our gladness! loud laugh i in fullest delight, holding embraced all thy glory, feeling the beats of thy heart! (the great door springs open.)sieglinde ha, who went? who entered here? (the door remains open: outside a glorious spring n

In [57]:
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   oe  he   o   o