In [1]:
#Imoporing required packages
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
#Mounting Google Drive to colab
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
#Importing data from google drive
filename = "/content/gdrive/My Drive/shakespeare_input.txt"
raw_text = open(filename).read()
raw_text = raw_text.lower()


In [0]:

#create mapping of unique characters to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))


In [24]:
#Vocabs and Characters
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocabs: ", n_vocab)

Total Characters:  4573338
Total Vocabs:  41


In [6]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  4573238


In [0]:
# reshape X to be [samples, time steps, features]
X = np.reshape(dataX, (n_patterns, seq_length, 1))


In [0]:
# normalize
X = X / float(n_vocab)

In [0]:
from keras.utils import np_utils

In [0]:
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [0]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [0]:
#1-layer LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
#2-layer LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]),return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
#checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [20]:
model.fit(X, y, epochs=2, batch_size=1024, callbacks=callbacks_list)

W0701 09:31:46.707587 140440527189888 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/2

Epoch 00001: loss improved from inf to 2.43631, saving model to weights-improvement-01-2.4363.hdf5
Epoch 2/2

Epoch 00002: loss improved from 2.43631 to 2.00943, saving model to weights-improvement-02-2.0094.hdf5


<keras.callbacks.History at 0x7fba7d586b00>

In [0]:
# load the network weights (#one layer LSTM checkpoint)
filename = "weights-improvement-01-2.5026.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [35]:
#Text generation after training the LSTM 
import sys
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = np.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = np.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" eems warm upon her lip.

leontes:
the fixture of her eye has motion in't,
as we are mock'd with art. "
n toee to toee toe
tore the world tfat the world that soeek to meve
the world of the sore tf tee the ponee tfat the sore

she sooe th tee the pone of the sore tf teee to toee toeee 
she sooe the world tfat the pooe of the sore

soe toank the pone tf tee the pone of the sore

soe toank the tore tf tee the pone of the sore

soe toanke to the pone of the sore tfat toen the sore

soe toanker 
iore to the pone oo the pante 
for the world to the world to toee to meve
the world of the sore tf tee the ponee tfat toe
toane to the world tfat the world that toen the sore

soe toanker 
io the sore tf tee the pone of the sore

soe toankee ie the world of the sore 
she sooe the sooe tf tee to toee the sore

soe toank the tore tf tee the pone of the sore to tee toeee
the tore tf tee the pone of the sore tfat toene
to the tore tf tee the pone of the sore

soe dnnreet oo the sore tf tee the po

In [0]:
#2-layer LSTM 
# load the network weights
filename = "weights-improvement-02-2.0094.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [23]:
#Text generation after training the LSTM 
import sys
# pick a random seed
start = np.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
for i in range(1000):
	x = np.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = np.argmax(prediction)
	result = int_to_char[index]
	seq_in = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print ("\nDone.")

Seed:
" re virtuous:
nor from mine own weak merits will i draw
the smallest fear or doubt of her revolt;
for "
 the counter of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of the sears of th

In [0]:
##Conclusion: Build and trained two LSTM model i.e 1 layer and 2 layer model with dropout.
##2-layer LSTM is overfitting where as 1 layer model did not overfit.
##1 layer model performed better than 2 layer model.
##Only two epochs and 1024 batch size is used for faster training as it takes really long time for larger epochs and lesser batch size.
  