In [1]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)


In [2]:
EPOCHS = 50
BATCH_SIZE = 256
INPUT_FILE_NAME = 'Frankenstein.txt'
WINDOW_LENGTH = 40
WINDOW_STEP = 3
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50

In [3]:
file = open(INPUT_FILE_NAME, 'r', encoding='utf-8')
text = file.read()
file.close()
# Make lower case and split into individual words.
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace(' ', ' ')
# Encode characters as indices.
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index,
                                ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index,
                                ch in enumerate(unique_chars))
encoding_width = len(char_to_index)

In [4]:
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
    fragments.append(text[i: i + WINDOW_LENGTH])
    targets.append(text[i + WINDOW_LENGTH])
# Convert to one-hot encoded training data.
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))
for i, fragment in enumerate(fragments):
    for j, char in enumerate(fragment):
        X[i, j, char_to_index[char]] = 1
    target_char = targets[i]
    y[i, char_to_index[target_char]] = 1

In [5]:
# Build and train model.
model = Sequential()
model.add(LSTM(128, return_sequences=True,
dropout=0.2, recurrent_dropout=0.2,
input_shape=(None, encoding_width)))
model.add(LSTM(128, dropout=0.2,
recurrent_dropout=0.2))
model.add(Dense(encoding_width, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam')
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 128)         99840     
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 66)                8514      
Total params: 239,938
Trainable params: 239,938
Non-trainable params: 0
_________________________________________________________________
Train on 138944 samples, validate on 7313 samples
Epoch 1/50
138944/138944 - 51s - loss: 2.8049 - val_loss: 2.6683
Epoch 2/50
138944/138944 - 52s - loss: 2.3857 - val_loss: 2.5303
Epoch 3/50
138944/138944 - 53s - loss: 2.2400 - val_loss: 2.4263
Epoch 4/50
138944/138944 - 59s - loss: 2.1499 - val_loss: 2.3650
Epoch 5/50
138944/138944 - 61s - loss: 2.0812 - val_loss: 2.3307
Epoch

In [7]:
history = model.fit(X, y, validation_split=0.05,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    verbose=2,
                    shuffle=True)

Train on 138944 samples, validate on 7313 samples
Epoch 1/50
138944/138944 - 48s - loss: 1.6342 - val_loss: 2.0750
Epoch 2/50
138944/138944 - 50s - loss: 1.6267 - val_loss: 2.0771
Epoch 3/50
138944/138944 - 57s - loss: 1.6208 - val_loss: 2.0706
Epoch 4/50
138944/138944 - 57s - loss: 1.6119 - val_loss: 2.0657
Epoch 5/50
138944/138944 - 66s - loss: 1.6029 - val_loss: 2.0701
Epoch 6/50
138944/138944 - 63s - loss: 1.5959 - val_loss: 2.0651
Epoch 7/50
138944/138944 - 62s - loss: 1.5891 - val_loss: 2.0578
Epoch 8/50
138944/138944 - 63s - loss: 1.5832 - val_loss: 2.0472
Epoch 9/50
138944/138944 - 67s - loss: 1.5753 - val_loss: 2.0576
Epoch 10/50
138944/138944 - 62s - loss: 1.5698 - val_loss: 2.0530
Epoch 11/50
138944/138944 - 65s - loss: 1.5645 - val_loss: 2.0488
Epoch 12/50
138944/138944 - 68s - loss: 1.5564 - val_loss: 2.0408
Epoch 13/50
138944/138944 - 62s - loss: 1.5530 - val_loss: 2.0480
Epoch 14/50
138944/138944 - 62s - loss: 1.5469 - val_loss: 2.0489
Epoch 15/50
138944/138944 - 67s - l

KeyboardInterrupt: 

In [6]:
letters = 'alice '
one_hots = []
for i, char in enumerate(letters):
    x = np.zeros(encoding_width)
    x[char_to_index[char]] = 1
    one_hots.append(x)
beams = [(np.log(1.0), letters, one_hots)]
# Predict NUM_LETTERS into the future.
for i in range(NUM_LETTERS):
    minibatch_list = []
    # Create minibatch from one-hot encodings, and predict.
    for triple in beams:
        minibatch_list.append(triple[2])
    minibatch = np.array(minibatch_list)
    y_predict = model.predict(minibatch, verbose=0)
    new_beams = []
    for j, softmax_vec in enumerate(y_predict):
        triple = beams[j]
    # Create BEAM_SIZE new beams from each existing beam.
    for k in range(BEAM_SIZE):
        char_index = np.argmax(softmax_vec)
        new_prob = triple[0] + np.log(
        softmax_vec[char_index])
        new_letters = triple[1] + index_to_char[char_index]
        x = np.zeros(encoding_width)
        x[char_index] = 1
        new_one_hots = triple[2].copy()
        new_one_hots.append(x)
        new_beams.append((new_prob, new_letters,
        new_one_hots))
        softmax_vec[char_index] = 0
# Prune tree to only keep BEAM_SIZE most probable beams.
    new_beams.sort(key=lambda tup: tup[0], reverse=True)
    beams = new_beams[0:BEAM_SIZE]
for item in beams:
    print(item[1])

alice siddodagy: 
alice siddodagy:”
alice siddodagy:,
alice siddodagy:_
alice siddodagy:.
alice siddodagy:d
alice siddodagy:s
alice siddodagy:’
