In [1]:
# This interesting program is to implement RNN (Recurrent Neural Network) - LSTM (Long Short Team Memory)
# to predict the next word from every 3 words input in an sample story.
# https://towardsdatascience.com/lstm-by-example-using-tensorflow-feb0c1968537

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import random
import collections
import time

In [2]:
# calculate elapsed model processing time

start_time = time.time()
def elapsed(sec):
    if sec<60:
        return str(sec) + " sec"
    elif sec<(60*60):
        return str(sec/60) + " min"
    else:
        return str(sec/(60*60)) + " hr"

In [3]:
# Target log path
logs_path = '/tensorflow/rnn_words'
writer = tf.summary.FileWriter(logs_path)


In [4]:
# Text file containing words for training
training_file = '/tensorflow/rnn_words/story1.txt'

In [5]:
def read_data(fname):
    with open(fname) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [word for i in range(len(content)) for word in content[i].split()]
    content = np.array(content)
    return content

In [6]:
print("Loaded training data...")
training_data = read_data(training_file)
print("Loaded training data completed.")

Loaded training data...
Loaded training data completed.


In [7]:
training_data[:]

array(['long', 'ago', ',', 'the', 'mice', 'had', 'a', 'general',
       'council', 'to', 'consider', 'what', 'measures', 'they', 'could',
       'take', 'to', 'outwit', 'their', 'common', 'enemy', ',', 'the',
       'cat', '.', 'some', 'said', 'this', ',', 'and', 'some', 'said',
       'that', 'but', 'at', 'last', 'a', 'young', 'mouse', 'got', 'up',
       'and', 'said', 'he', 'had', 'a', 'proposal', 'to', 'make', ',',
       'which', 'he', 'thought', 'would', 'meet', 'the', 'case', '.',
       'you', 'will', 'all', 'agree', ',', 'said', 'he', ',', 'that',
       'our', 'chief', 'danger', 'consists', 'in', 'the', 'sly', 'and',
       'treacherous', 'manner', 'in', 'which', 'the', 'enemy',
       'approaches', 'us', '.', 'now', ',', 'if', 'we', 'could',
       'receive', 'some', 'signal', 'of', 'her', 'approach', ',', 'we',
       'could', 'easily', 'escape', 'from', 'her', '.', 'i', 'venture',
       ',', 'therefore', ',', 'to', 'propose', 'that', 'a', 'small',
       'bell', 'be', 'pr

In [8]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary


In [9]:
dictionary, reverse_dictionary = build_dataset(training_data)

In [10]:
print (dictionary.get('her'))
print( reverse_dictionary.get(29))

29
her


In [11]:
vocab_size = len(dictionary)

In [12]:
print(vocab_size)

112


In [13]:
# Parameters
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 3

In [14]:
# number of units in RNN cell
n_hidden = 512

In [15]:
# tf Graph input
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

In [16]:
print(x);
print(y)

Tensor("Placeholder:0", shape=(?, 3, 1), dtype=float32)
Tensor("Placeholder_1:0", shape=(?, 112), dtype=float32)


In [17]:
# RNN output node weights and biases
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}

Instructions for updating:
Colocations handled automatically by placer.


In [18]:
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}


In [19]:
print(weights);
print(biases);

{'out': <tf.Variable 'Variable:0' shape=(512, 112) dtype=float32_ref>}
{'out': <tf.Variable 'Variable_1:0' shape=(112,) dtype=float32_ref>}


In [20]:
def RNN(x, weights, biases):

    # reshape to [1, n_input]
    x = tf.reshape(x, [-1, n_input])

    # Generate a n_input-element sequence of inputs
    # (eg. [had] [a] [general] -> [20] [6] [33])
    x = tf.split(x,n_input,1)

    # 2-layer LSTM, each layer has n_hidden units.
    # Average Accuracy= 95.20% at 50k iter
    rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

    # 1-layer LSTM with n_hidden units but with lower accuracy.
    # Average Accuracy= 90.60% 50k iter
    # Uncomment line below to test but comment out the 2-layer rnn.MultiRNNCell above
    # rnn_cell = rnn.BasicLSTMCell(n_hidden)

    # generate prediction
    outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32)

    # there are n_input outputs but
    # we only want the last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']


In [21]:
pred = RNN(x, weights, biases)


Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API


In [22]:
# Loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [23]:
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

In [None]:
# Model evaluation
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


In [None]:
# Initializing the variables
init = tf.global_variables_initializer()


In [None]:
# Launch the graph
with tf.Session() as session:
    session.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0

    writer.add_graph(session.graph)

    while step < training_iters:
        # Generate a minibatch. Add some randomness on selection process.
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)

        symbols_in_keys = [ [dictionary[ str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])

        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        symbols_out_onehot = np.reshape(symbols_out_onehot,[1,-1])

        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)

Iter= 1000, Average Loss= 4.522732, Average Accuracy= 3.20%
['nobody', 'spoke', '.'] - [then] vs [until]
Iter= 2000, Average Loss= 2.945253, Average Accuracy= 17.40%
['well', ',', 'but'] - [who] vs [could]
Iter= 3000, Average Loss= 2.256479, Average Accuracy= 40.10%
['met', 'with', 'general'] - [applause] vs [applause]
Iter= 4000, Average Loss= 2.172816, Average Accuracy= 46.40%
['when', 'she', 'was'] - [about] vs [about]
Iter= 5000, Average Loss= 1.813674, Average Accuracy= 53.60%
['round', 'the', 'neck'] - [of] vs [,]
Iter= 6000, Average Loss= 1.641074, Average Accuracy= 55.20%
[',', 'to', 'propose'] - [that] vs [that]
Iter= 7000, Average Loss= 1.352943, Average Accuracy= 66.00%
['escape', 'from', 'her'] - [.] vs [which]
Iter= 8000, Average Loss= 1.326775, Average Accuracy= 66.70%
['the', 'sly', 'and'] - [treacherous] vs [you]
Iter= 9000, Average Loss= 1.086272, Average Accuracy= 69.30%
['meet', 'the', 'case'] - [.] vs [,]
Iter= 10000, Average Loss= 1.088446, Average Accuracy= 71.50%

In [None]:
    print("Optimization Finished!")
    print("Elapsed time: ", elapsed(time.time() - start_time))
    print("Run on command line.")
    print("\ttensorboard --logdir=%s" % (logs_path))
    print("Point your web browser to: http://localhost:6006/")


In [None]:
    while True:
        prompt = "%s words: " % n_input
        sentence = input(prompt)
        sentence = sentence.strip()
        words = sentence.split(' ')
        if len(words) != n_input:
            continue
        try:
            symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
            for i in range(32):
                keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
                onehot_pred = session.run(pred, feed_dict={x: keys})
                onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
                sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
                symbols_in_keys = symbols_in_keys[1:]
                symbols_in_keys.append(onehot_pred_index)
            print(sentence)
        except:
            print("Word not in dictionary")
