<a href="https://colab.research.google.com/github/nrajmalwar/Python/blob/master/Session%202/Phase_2_Session_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Classes and Functions

In [1]:
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


# Load Dataset

In [2]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, mode='r', encoding='utf-8-sig').read()

raw_text = raw_text.lower()

len(raw_text)

144430

In [3]:
# data cleanup - remove punctuations from the text
import string
raw_text = raw_text.translate(str.maketrans('', '', string.punctuation))

len(raw_text)

136110

In [4]:
# Print first 1000 characters
print(raw_text[0:1000])

alices adventures in wonderland

lewis carroll

the millennium fulcrum edition 30




chapter i down the rabbithole

alice was beginning to get very tired of sitting by her sister on the
bank and of having nothing to do once or twice she had peeped into the
book her sister was reading but it had no pictures or conversations in
it and what is the use of a book thought alice without pictures or
conversations

so she was considering in her own mind as well as she could for the
hot day made her feel very sleepy and stupid whether the pleasure
of making a daisychain would be worth the trouble of getting up and
picking the daisies when suddenly a white rabbit with pink eyes ran
close by her

there was nothing so very remarkable in that nor did alice think it so
very much out of the way to hear the rabbit say to itself oh dear
oh dear i shall be late when she thought it over afterwards it
occurred to her that she ought to have wondered at this but at the time
it all seemed quite natural but w

In [0]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [6]:
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  136110
Total Vocab:  30


In [7]:
char_to_int

{'\n': 0,
 ' ': 1,
 '0': 2,
 '3': 3,
 'a': 4,
 'b': 5,
 'c': 6,
 'd': 7,
 'e': 8,
 'f': 9,
 'g': 10,
 'h': 11,
 'i': 12,
 'j': 13,
 'k': 14,
 'l': 15,
 'm': 16,
 'n': 17,
 'o': 18,
 'p': 19,
 'q': 20,
 'r': 21,
 's': 22,
 't': 23,
 'u': 24,
 'v': 25,
 'w': 26,
 'x': 27,
 'y': 28,
 'z': 29}

# Use Padded Sequence

In [9]:
# prepare the dataset of input to output pairs encoded as integers

# Use a sequence length of 100
seq_length = 100
dataX = []
dataY = []
count = 0
input_seq = []

# Run a for loop to fetch each sequence of length 100
for i in range(0, n_chars, seq_length):
  seq_in = raw_text[i:i + seq_length]
  
  # # Run through the entire sequence
  for j in range(0, len(seq_in)-1):
    count += 1
    
    # Extract a sub-sequence from length 1 to 100
    in_seq = seq_in[:j+1]
    out_seq = seq_in[j+1]
    
    # Append the sub-sequences together
    input_seq.append([char_to_int[char] for char in in_seq])
    dataY.append(char_to_int[out_seq])

# Pad all the sequences to length 100, use pre-padding    
dataX = numpy.array(pad_sequences(input_seq, maxlen=seq_length, padding='pre')) 

n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  134748


In [10]:
len(dataY)

134748

In [11]:
len(dataX)

134748

In [29]:
dataX[:5]

array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  4],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  4, 15],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0

In [0]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

# LSTM Network

In [13]:
# define the LSTM model

model = Sequential()

# Add dropout of 0.1 to the input layer
model.add(Dropout(0.1))
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))

model.add(Dropout(0.1))
model.add(LSTM(256))

model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

W0727 14:21:51.789352 139738329945984 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0727 14:21:51.806673 139738329945984 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



# Model Checkpoint

In [14]:
# Mount Google Drive to save the model
from google.colab import drive
drive.mount('/content/drive')

# Use ModelCheckpoint
from keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('/content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5', monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Model Training

In [15]:
model.fit(X, y, epochs=50, batch_size=256, callbacks=callbacks_list)

W0727 14:21:51.838175 139738329945984 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0727 14:21:51.841794 139738329945984 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0727 14:21:51.855834 139738329945984 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0727 14:21:51.870910 139738329945984 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backe

Epoch 1/50

Epoch 00001: loss improved from inf to 2.83738, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 2/50

Epoch 00002: loss improved from 2.83738 to 2.65355, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 3/50

Epoch 00003: loss improved from 2.65355 to 2.47743, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 4/50

Epoch 00004: loss improved from 2.47743 to 2.33416, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 5/50

Epoch 00005: loss improved from 2.33416 to 2.23271, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 6/50

Epoch 00006: loss improved from 2.23271 to 2.15804, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 7/50

Epoch 00007: loss improved from 2.15804 to 2.11723, saving model to /content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5
Epoch 8/50

Epoch 00008: loss improved from 2.11723 to 2.05300, sa

<keras.callbacks.History at 0x7f173a5c3c50>

# Generate Text

In [0]:
# load the network weights
filename = '/content/drive/My Drive/Colab Notebooks/EIP_P2S2.hdf5'
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [28]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = list(dataX[start])
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# generate characters
for i in range(500):
	x = numpy.reshape(pattern, (1, len(pattern), 1))
	x = x / float(n_vocab)
	prediction = model.predict(x, verbose=0)
	index = numpy.argmax(prediction)
	result = int_to_char[index]
	seq_in_2 = [int_to_char[value] for value in pattern]
	sys.stdout.write(result)
	pattern.append(index)
	pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" 


aving nothing to do once or twice she had peeped into the
book her sister was reading but it had  "
tatted tather anice anlce anlce anl rveer toanl anice anl e mike a tueenisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuite a cooverattoalisg taid toeakdd tuit
Done.
