In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)

In [2]:
EPOCHS = 100
BATCH_SIZE = 256
WINDOW_LENGTH = 40
WINDOW_STEP = 3
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50

In [3]:
# Open the input file.
INPUT_FILE_NAME = './Alice in Wonderland.txt'
file = open(INPUT_FILE_NAME, 'r', encoding='utf-8')
text = file.read()
file.close()
INPUT_FILE_NAME2 = './Frankenstein.txt'
file = open(INPUT_FILE_NAME2, 'r', encoding='utf-8')
text += file.read()
file.close()

In [4]:
#Preprocessing
# Make lowercase and remove newline and extra spaces.
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace('  ', ' ')
# Encode characters as indices.
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index, ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index, ch in enumerate(unique_chars))
encoding_width = len(char_to_index)

In [5]:
text = text

In [6]:
# Create training examples.
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
    fragments.append(text[i: i + WINDOW_LENGTH])
    targets.append(text[i + WINDOW_LENGTH])
# Convert to one-hot encoded training data.
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))
for i, fragment in enumerate(fragments):
    for j, char in enumerate(fragment):
        X[i, j, char_to_index[char]] = 1
    target_char = targets[i]
    y[i, char_to_index[target_char]] = 1

In [7]:
#Training Sample
print(encoding_width)
print(X[0].shape)
print("Training Input:",X[0])
print("Training Output:",y[0])

69
(40, 69)
Training Input: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Training Output: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]


In [8]:
model = Sequential()

model.add(LSTM(126, 
                return_sequences=True,
                dropout=0.2, recurrent_dropout=0.2,
                input_shape=(None,encoding_width)
                ))

model.add(LSTM(126, 
                dropout=0.2,
                return_sequences=True,
                recurrent_dropout=0.2))

model.add(LSTM(256, 
                dropout=0.2,
                recurrent_dropout=0.2))

model.add(Dense(encoding_width, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 126)         98784     
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 126)         127512    
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               392192    
_________________________________________________________________
dense (Dense)                (None, 69)                17733     
Total params: 636,221
Trainable params: 636,221
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(loss='categorical_crossentropy',
                optimizer='adam')

In [10]:
history = model.fit(X, y, validation_split=0.05,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS, verbose=2,
                    shuffle=True)

Train on 190076 samples, validate on 10004 samples
Epoch 1/100
190076/190076 - 125s - loss: 2.7795 - val_loss: 2.4798
Epoch 2/100
190076/190076 - 141s - loss: 2.2751 - val_loss: 2.2586
Epoch 3/100
190076/190076 - 144s - loss: 2.1013 - val_loss: 2.0990
Epoch 4/100
190076/190076 - 143s - loss: 1.9893 - val_loss: 2.0044
Epoch 5/100
190076/190076 - 140s - loss: 1.9015 - val_loss: 1.9063
Epoch 6/100
190076/190076 - 143s - loss: 1.8348 - val_loss: 1.8484
Epoch 7/100
190076/190076 - 141s - loss: 1.7801 - val_loss: 1.7858
Epoch 8/100
190076/190076 - 141s - loss: 1.7337 - val_loss: 1.7457
Epoch 9/100
190076/190076 - 145s - loss: 1.6918 - val_loss: 1.6996
Epoch 10/100
190076/190076 - 145s - loss: 1.6599 - val_loss: 1.6703
Epoch 11/100
190076/190076 - 145s - loss: 1.6306 - val_loss: 1.6353
Epoch 12/100
190076/190076 - 143s - loss: 1.6034 - val_loss: 1.6165
Epoch 13/100
190076/190076 - 145s - loss: 1.5811 - val_loss: 1.5959
Epoch 14/100
190076/190076 - 144s - loss: 1.5612 - val_loss: 1.5723
Epoch 

KeyboardInterrupt: 

### Beam Search

In [None]:
# Create initial single beam represented by triplet (probability , string , encoded string).

In [11]:
def BeamSearch(letters):
    encoded = []
    for i, char in enumerate(letters):
        x = np.zeros(encoding_width)
        x[ char_to_index[char]] = 1
        encoded.append(x)
    # encoded = np.array(encoded).reshape(len(encoded),1)
    beams = [(np.log(1.0), letters, encoded)]
    # Predict NUM_LETTERS into the future.
    for i in range(NUM_LETTERS):
        new_beams = []
        minibatch = []
        for triple in beams:
            minibatch.append(triple[2])
        y_predict = model.predict(np.array(minibatch), verbose=0)
            # print(y_predict)

        for j, softmax_vec in enumerate(y_predict):
            triple = beams[j]
            for k in range(BEAM_SIZE):
                char_index = np.argmax(softmax_vec)
                new_prob = triple[0] + np.log(softmax_vec[char_index])
                new_letters = triple[1] + index_to_char[char_index]
                x = np.zeros(encoding_width)
                x[ char_index]  = 1
                new_encoded = triple[2].copy()
                new_encoded.append(x)
                new_beams.append((new_prob, new_letters,new_encoded))
                softmax_vec[char_index] = 0
    # Prune tree to only keep BEAM_SIZE most probable beams.
        new_beams.sort(key=lambda tup: tup[0], reverse=True)
        beams = new_beams[0:BEAM_SIZE]
    return beams

In [20]:
NUM_LETTERS = 20
letters = 'rabbit-hole she'
beams = BeamSearch(letters)
for item in beams:
    print(item[1])

rabbit-hole she said alice, and the
rabbit-hole she had been the works 
rabbit-hole she said alice, and she
rabbit-hole she had been the words 
rabbit-hole she said alice, and thi
rabbit-hole she said alice, and tha
rabbit-hole she said alice, and i h
rabbit-hole she had been the most o


In [46]:
letters = 'justine was '
beams = BeamSearch(letters)
for item in beams:
    print(item[1])

justine was discovered, and the 
justine was destruction of this 
justine was destruction of the s
justine was destruction of the m
justine was destruction of the p
justine was discovered that i ha
justine was destruction of the w
justine was destruction of the c
