In [1]:
import numpy

In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [6]:
# load ascii text and covert to lowercase
filename = "wonderland2.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()

In [7]:
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [8]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  5468
Total Vocab:  34


In [9]:
	
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
        seq_in = raw_text[i:i + seq_length]
        seq_out = raw_text[i + seq_length]
        dataX.append([char_to_int[char] for char in seq_in])
        dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  5368


In [10]:
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [11]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [9]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [10]:
model.fit(X, y, epochs=50, batch_size=128, callbacks=callbacks_list)


Epoch 1/50

Epoch 00001: loss improved from inf to 3.12359, saving model to weights-improvement-01-3.1236.hdf5
Epoch 2/50

Epoch 00002: loss improved from 3.12359 to 3.02505, saving model to weights-improvement-02-3.0251.hdf5
Epoch 3/50

Epoch 00003: loss improved from 3.02505 to 3.01533, saving model to weights-improvement-03-3.0153.hdf5
Epoch 4/50

Epoch 00004: loss did not improve
Epoch 5/50

Epoch 00005: loss improved from 3.01533 to 3.00906, saving model to weights-improvement-05-3.0091.hdf5
Epoch 6/50

Epoch 00006: loss improved from 3.00906 to 3.00549, saving model to weights-improvement-06-3.0055.hdf5
Epoch 7/50

Epoch 00007: loss improved from 3.00549 to 2.99378, saving model to weights-improvement-07-2.9938.hdf5
Epoch 8/50

Epoch 00008: loss improved from 2.99378 to 2.96834, saving model to weights-improvement-08-2.9683.hdf5
Epoch 9/50

Epoch 00009: loss improved from 2.96834 to 2.92270, saving model to weights-improvement-09-2.9227.hdf5
Epoch 10/50

Epoch 00010: loss improve


Epoch 00046: loss improved from 0.60723 to 0.56532, saving model to weights-improvement-46-0.5653.hdf5
Epoch 47/50

Epoch 00047: loss improved from 0.56532 to 0.51900, saving model to weights-improvement-47-0.5190.hdf5
Epoch 48/50

Epoch 00048: loss improved from 0.51900 to 0.48829, saving model to weights-improvement-48-0.4883.hdf5
Epoch 49/50

Epoch 00049: loss improved from 0.48829 to 0.48221, saving model to weights-improvement-49-0.4822.hdf5
Epoch 50/50

Epoch 00050: loss improved from 0.48221 to 0.45536, saving model to weights-improvement-50-0.4554.hdf5


<keras.callbacks.History at 0x24e9320d4e0>

In [12]:
# load the network weights#
filename = "weights-improvement-50-0.4554.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [13]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [30]:
import sys
#Random Seed Selection 
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
print("----------------------------------------------------------")
for i in range(100):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print( "\nDone.")

Seed:
" the door
always looking up at higher floors
want to see it all give me more (rise, rise up)
i was al "
----------------------------------------------------------
ways up for the making changes
walking down the street and meeting strangers
flipping through my lif
Done.


In [15]:
file = open("test_file.txt", "r") 



In [16]:
print(file.read())

“Hello World”“This is our new text file”“and this is another line.”“Why? Because we can.”
