In [2]:
import numpy 
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils 


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#get data to lower case for pre processing
data = "wonderland.txt"
raw_data = open(data).read()
raw_data = raw_data.lower()

In [4]:
#index each character to integer 
chars = sorted(list(set(raw_data)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
print(chars)

['\n', ' ', '!', '"', '#', '$', '%', "'", '(', ')', '*', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '@', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [5]:
#lenghts of thee data
n_chars = len(raw_data)
n_vocabulary = len(chars)
print("total number of characters are " +str(n_chars))
print("total distinct stuff are " +str(n_vocabulary))


total number of characters are 163779
total distinct stuff are 58


In [6]:
#coomsidering 100 chars in each training pattern as the x and y as the char next to it and sliding it accross each character
#of the dataset so that each char gets to learn 
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_data[i:i + seq_length]
	seq_out = raw_data[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  163679


In [7]:
#to finalise our training data we need to one hot encode the output that is Y
#restructuring x_data to (sample, timesteps, features)
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
X = X / float(n_vocabulary)
Y = np_utils.to_categorical(dataY)

Our training data is now pre processed and ready to be fit into the LSTM model 

In [9]:
#defining the LSTM
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(Y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [10]:
# fitting the training data into the LSTM
model.fit(X,Y, epochs = 5, batch_size = 128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x18157cf5f98>

In [11]:
model.save_weights('saved_stuff.h5')

In [14]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [17]:
import sys

we have trained our model on five epochs due to limited gpu service on my system 
now we will predict further text using the seed sequence form 


In [18]:
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocabulary)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" she
had kept a piece of it in her pocket) till she was about a foot high:
then she walked down the l "
are th the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare to the hare to the hare to the hare and the woete to the hare to the hare to the hare t