In [4]:
# Step 1 - Import the necessary libraries
import numpy
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import Dropout 
from keras.layers import LSTM 
from keras.callbacks import ModelCheckpoint 
from keras.utils import np_utils 

In [8]:
# Step 2 - load the sample data
Sample_data = "/content/NLP.txt"
wonderland_text = open(Sample_data, 'r', encoding='utf-8').read()
wonderland_text = wonderland_text.lower()
print(wonderland_text)

alice was beginning to get very tired of sitting by her sister
on the bank, and of having nothing to do:  once or twice she had
peeped into the book her sister was reading, but it had no
pictures or conversations in it, `and what is the use of a book,'
thought alice `without pictures or conversation?'



In [6]:
# Step 3 - Create mapping of unique characters and integers
My_characters = sorted(list(set(wonderland_text)))
character_to_integer = dict((c, i) for i, c in enumerate(My_characters))
character_to_integer

{'\n': 0,
 ' ': 1,
 "'": 2,
 ',': 3,
 ':': 4,
 '?': 5,
 '`': 6,
 'a': 7,
 'b': 8,
 'c': 9,
 'd': 10,
 'e': 11,
 'f': 12,
 'g': 13,
 'h': 14,
 'i': 15,
 'k': 16,
 'l': 17,
 'n': 18,
 'o': 19,
 'p': 20,
 'r': 21,
 's': 22,
 't': 23,
 'u': 24,
 'v': 25,
 'w': 26,
 'y': 27}

In [14]:
# Step 4 - Summarize the data
wonder_chars = len(wonderland_text)
wonder_vocab = len(My_characters)
print("Total Characters Present in the Sample data: ", wonder_chars)
print("Total Vocab in the data: ", wonder_vocab)

Total Characters Present in the Sample data:  303
Total Vocab in the data:  28


In [10]:
# Step 5 - Prepare the dataset
sequence_length = 100
x_data = []
y_data = []
for i in range(0, wonder_chars - sequence_length, 1):
  sequence_in = wonderland_text[i:i + sequence_length]
  sequence_out = wonderland_text[i + sequence_length]
  x_data.append([character_to_integer[char] for char in sequence_in])
  y_data.append(character_to_integer[sequence_out])
pattern_nn = len(x_data)
print("Result of total patterns:", pattern_nn)

Result of total patterns: 203


In [11]:
# Step 6 - Reshaping the data
X = numpy.reshape(x_data, (pattern_nn, sequence_length, 1))
X = X / float(wonder_vocab)
y = np_utils.to_categorical(y_data)

In [17]:
#Step 7 - Define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [18]:
# Step 8 - Define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [19]:
# Step 9 - Fit the model
model.fit(X, y, epochs=1, batch_size=128, callbacks=callbacks_list)


Epoch 00001: loss improved from inf to 3.30116, saving model to weights-improvement-01-3.3012.hdf5


<keras.callbacks.History at 0x7f76da42a190>