In [1]:
import numpy as np
import pandas as pd

In [2]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils


Using TensorFlow backend.


In [4]:
# loading text file

filename = "shakespear.txt"
raw_txt = open(filename, "r").read()
raw_txt = raw_txt.lower()


In [5]:
# create mapping of all unique chars to integers

chars = sorted(list(set(raw_txt)))
char_to_int = dict([(c, i) for i, c in enumerate(chars)])
char_to_int

{'\n': 0,
 ' ': 1,
 '!': 2,
 "'": 3,
 ',': 4,
 '-': 5,
 '.': 6,
 ':': 7,
 ';': 8,
 '?': 9,
 'a': 10,
 'b': 11,
 'c': 12,
 'd': 13,
 'e': 14,
 'f': 15,
 'g': 16,
 'h': 17,
 'i': 18,
 'j': 19,
 'k': 20,
 'l': 21,
 'm': 22,
 'n': 23,
 'o': 24,
 'p': 25,
 'q': 26,
 'r': 27,
 's': 28,
 't': 29,
 'u': 30,
 'v': 31,
 'w': 32,
 'x': 33,
 'y': 34,
 'z': 35}

In [6]:
int_to_char = dict([(i, c) for i, c in enumerate(chars)])
int_to_char

{0: '\n',
 1: ' ',
 2: '!',
 3: "'",
 4: ',',
 5: '-',
 6: '.',
 7: ':',
 8: ';',
 9: '?',
 10: 'a',
 11: 'b',
 12: 'c',
 13: 'd',
 14: 'e',
 15: 'f',
 16: 'g',
 17: 'h',
 18: 'i',
 19: 'j',
 20: 'k',
 21: 'l',
 22: 'm',
 23: 'n',
 24: 'o',
 25: 'p',
 26: 'q',
 27: 'r',
 28: 's',
 29: 't',
 30: 'u',
 31: 'v',
 32: 'w',
 33: 'x',
 34: 'y',
 35: 'z'}

In [7]:
n_char = len(raw_txt)
n_char

99993

In [8]:
n_vocabs = len(chars)
n_vocabs

36

In [92]:
seq_length = 200

datax = []
datay = []

for i in range(seq_length, n_char, 1):
    seq_in = raw_txt[i-seq_length : i]
    seq_out = raw_txt[i]
    datax.append([char_to_int[char] for char in seq_in])
    datay.append([char_to_int[char] for char in seq_out])


In [10]:
n_patterns = len(datax)
n_patterns

99793

In [11]:
np.shape(datax)

(99793, 200)

In [12]:
# reshaping datax so that it is accepted by lstm

x = np.reshape(datax, (np.shape(datax)[0], np.shape(datax)[1],1))
x.shape

(99793, 200, 1)

In [13]:
## normalizing it 

x = x/float(n_vocabs)

In [14]:
# one hot encoding of datay

y = np_utils.to_categorical(datay)
y.shape

(99793, 36)

# LSTM model

In [None]:
model = Sequential()

# first lstm layer
model.add(LSTM(units = 256, return_sequences= True))
model.add(Dropout(0.2))

# adding second lstm layer
model.add(LSTM(units = 256))

# final dense layer
model.add(Dense(y.shape[1], activation = "softmax"))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [23]:
# define the checkpoints and callbacks

filepath="saved_model/weights-imporvement-{epoch: 02d}-{loss: .4f}-from-class.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [25]:
model.fit(x,y, batch_size = 64, epochs = 10, callbacks = callbacks_list)

Epoch 1/10

Epoch 00001: loss improved from inf to 2.83037, saving model to saved_model/weights-imporvement- 1- 2.8304-from-class.hdf5
Epoch 2/10

Epoch 00002: loss improved from 2.83037 to 2.53994, saving model to saved_model/weights-imporvement- 2- 2.5399-from-class.hdf5
Epoch 3/10

Epoch 00003: loss improved from 2.53994 to 2.37288, saving model to saved_model/weights-imporvement- 3- 2.3729-from-class.hdf5
Epoch 4/10

Epoch 00004: loss improved from 2.37288 to 2.25953, saving model to saved_model/weights-imporvement- 4- 2.2595-from-class.hdf5
Epoch 5/10

Epoch 00005: loss improved from 2.25953 to 2.17057, saving model to saved_model/weights-imporvement- 5- 2.1706-from-class.hdf5
Epoch 6/10

Epoch 00006: loss improved from 2.17057 to 2.09537, saving model to saved_model/weights-imporvement- 6- 2.0954-from-class.hdf5
Epoch 7/10

Epoch 00007: loss improved from 2.09537 to 2.03064, saving model to saved_model/weights-imporvement- 7- 2.0306-from-class.hdf5
Epoch 8/10

Epoch 00008: loss i

<keras.callbacks.callbacks.History at 0x14dbf1b98c8>

In [93]:
# to generate text

start = np.random.randint(0,len(datax))
pattern = datax[start]
pattern_char = [int_to_char[val] for val in pattern]
print("Input seed:")
print("\"", ''.join(pattern_char), "\"")


Input seed:
" k:
stir up the provost.

marcus andronicus:
wife kill'd: it is a dead man's errlight;
but whiles i see the substance of his true beam
with famous driving poor and groans, and incurable unspeaker,
a pe "


In [94]:
import sys
for i in range(70):
    x = np.reshape(pattern, (1, len(pattern), 1))
    x = x/float(n_vocabs)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1: len(pattern)]


rtier of the sight of the sight of the sight of the sight of the sight