<a href="https://colab.research.google.com/github/tfprogress/daily/blob/master/bear.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Abhijeet Singh  
EIP-3 Phase-2 Session-2 Assignment  
www.absingh.com

Changes:
- Add encoding to file read
- Remove all punctuation from source text
- Remove DropOut from layer before Dense
- Add dropout to input LSTM layer
- Convert input to padded sequences
- Predict 500 characters
- DropOut of 0.1 everywhere
- Train for 100 epochs

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
!wget -q https://raw.githubusercontent.com/cseas/img/master/wonderland.txt

In [0]:
# Load LSTM network and generate text
import sys
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

In [0]:
# load ascii text and covert to lowercase
filename = "wonderland.txt"
raw_text = open(filename, encoding='utf-8-sig').read()
raw_text = raw_text.lower()

In [0]:
# Remove punctuation symbols from text
import string
translate_table = dict((ord(char), None) for char in string.punctuation)   
raw_text = raw_text.translate(translate_table)

In [0]:
# create mapping of unique chars to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
print(chars)

['\n', ' ', '0', '3', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [0]:
# summarize the loaded data
n_chars = len(raw_text)
n_vocab = len(chars)
print("Total Characters: ", n_chars)
print("Total Vocab: ", n_vocab)

Total Characters:  136111
Total Vocab:  30


In [0]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 20
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
	seq_in = raw_text[i:i + seq_length]
	seq_out = raw_text[i + seq_length]
	dataX.append([char_to_int[char] for char in seq_in])
	dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns: ", n_patterns)

Total Patterns:  136011


In [0]:
# Convert to padded sequences
from keras.preprocessing.sequence import pad_sequences
dataX = pad_sequences(dataX)

In [0]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [0]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), 
               dropout=0.1, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 100, 256)          264192    
_________________________________________________________________
dropout_1 (Dropout)          (None, 100, 256)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense_1 (Dense)              (None, 30)                7710      
Total params: 797,214
Trainable params: 797,214
Non-trainable params: 0
_________________________________________________________________


In [0]:
# define the checkpoint

# Prepare model saving directory.
save_dir = "/content/gdrive/My Drive/models/"
model_name = "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
filepath = save_dir + model_name

checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [0]:
model.fit(X, y, epochs=100, batch_size=64, callbacks=callbacks_list)

Epoch 1/100

Epoch 00001: loss improved from inf to 2.63640, saving model to /content/gdrive/My Drive/models/weights-improvement-01-2.6364-bigger.hdf5
Epoch 2/100

Epoch 00002: loss improved from 2.63640 to 2.23335, saving model to /content/gdrive/My Drive/models/weights-improvement-02-2.2333-bigger.hdf5
Epoch 3/100

Epoch 00003: loss improved from 2.23335 to 2.03440, saving model to /content/gdrive/My Drive/models/weights-improvement-03-2.0344-bigger.hdf5
Epoch 4/100

Epoch 00004: loss improved from 2.03440 to 1.90493, saving model to /content/gdrive/My Drive/models/weights-improvement-04-1.9049-bigger.hdf5
Epoch 5/100

Epoch 00005: loss improved from 1.90493 to 1.81034, saving model to /content/gdrive/My Drive/models/weights-improvement-05-1.8103-bigger.hdf5
Epoch 6/100

Epoch 00006: loss improved from 1.81034 to 1.73651, saving model to /content/gdrive/My Drive/models/weights-improvement-06-1.7365-bigger.hdf5
Epoch 7/100

Epoch 00007: loss improved from 1.73651 to 1.67252, saving mo

In [0]:
# Output truncated, last saved model is of epoch 63, continue training from epoch 64
# load the model
from keras.models import load_model
model = load_model("/content/gdrive/My Drive/models/weights-improvement-63-0.9213-bigger.hdf5")
# then fit the model
model.fit(X, y, epochs=100, batch_size=64, callbacks=callbacks_list, initial_epoch=63)

W0723 15:41:52.429114 139684304533376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0723 15:41:52.470895 139684304533376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0723 15:41:52.478215 139684304533376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0723 15:41:52.763219 139684304533376 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0723 15:41:52.776000 

Epoch 64/100

Epoch 00064: loss improved from inf to 0.91849, saving model to /content/gdrive/My Drive/models/weights-improvement-64-0.9185-bigger.hdf5
Epoch 65/100

Epoch 00065: loss did not improve from 0.91849
Epoch 66/100

Epoch 00066: loss improved from 0.91849 to 0.91384, saving model to /content/gdrive/My Drive/models/weights-improvement-66-0.9138-bigger.hdf5
Epoch 67/100

Epoch 00067: loss did not improve from 0.91384
Epoch 68/100

Epoch 00068: loss improved from 0.91384 to 0.90747, saving model to /content/gdrive/My Drive/models/weights-improvement-68-0.9075-bigger.hdf5
Epoch 69/100

Epoch 00069: loss did not improve from 0.90747
Epoch 70/100

Epoch 00070: loss improved from 0.90747 to 0.90508, saving model to /content/gdrive/My Drive/models/weights-improvement-70-0.9051-bigger.hdf5
Epoch 71/100

Epoch 00071: loss improved from 0.90508 to 0.90233, saving model to /content/gdrive/My Drive/models/weights-improvement-71-0.9023-bigger.hdf5
Epoch 72/100

Epoch 00072: loss did not i

In [0]:
# Output truncated, last saved model is of epoch 97, continue training from epoch 98
# load the model
from keras.models import load_model
model = load_model("/content/gdrive/My Drive/models/weights-improvement-97-0.8514-bigger.hdf5")
# then fit the model
model.fit(X, y, epochs=100, batch_size=64, callbacks=callbacks_list, initial_epoch=97)

W0724 07:38:59.171493 140019712563072 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0724 07:38:59.220469 140019712563072 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0724 07:38:59.228467 140019712563072 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0724 07:38:59.478618 140019712563072 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0724 07:38:59.497540 

Epoch 98/100

Epoch 00098: loss improved from inf to 0.86995, saving model to /content/gdrive/My Drive/models/weights-improvement-98-0.8700-bigger.hdf5
Epoch 99/100

Epoch 00099: loss did not improve from 0.86995
Epoch 100/100

Epoch 00100: loss did not improve from 0.86995


<keras.callbacks.History at 0x7f58969b7908>

In [0]:
# load the network weights
filename = "weights-improvement-97-0.8514-bigger.hdf5"
model.load_weights(save_dir + filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [0]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [0]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# generate characters
for i in range(500):
  x = numpy.reshape(pattern, (1, len(pattern), 1))
  x = x / float(n_vocab)
  prediction = model.predict(x, verbose=0)
  index = numpy.argmax(prediction)
  result = int_to_char[index]
  seq_in = [int_to_char[value] for value in pattern]
  sys.stdout.write(result)
  
  # 	pattern.append(index)
  pattern = numpy.append(pattern, index)
  	
  pattern = pattern[1:len(pattern)]
print("\nDone.")

Seed:
" 
a bit

perhaps it hasnt one alice ventured to remark

tut tut child said the duchess everythings go "
r the waid the hatter waid to the caterpillar 
the said the waid to here teadling the the whong the whought ill the was aod the hed to tee whe muckn ard the had good th tole of the tab it tas io a foers to sie said alice 
she ooee turtle sape oe tee roo the hedd to the sea ot the whought alice 
she ouee turtle sighed deao so tee of course wo the tea it taid alice 
she hors a little baler anl a farters topesting it at she said to herself as 
the corm taid the hatter whe sueen sail of tha said to 
Done.
