In [1]:
import collections
import random
# import time
import numpy as np
import tensorflow as tf

# want to write logs?

In [2]:
def read_data(file):
    with open(file) as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    content = [content[i].split() for i in range(len(content))]
    content = np.array(content)
    content = np.reshape(content, [-1, ])
    return content

training_file = 'belling_the_cat.txt'
training_data = read_data(training_file)
print ("Loaded training data...")

Loaded training data...


In [3]:
def build_dataset(words):
    # return list of words ordered by frequency
    count = collections.Counter(words).most_common()
    dictionary = {}
    # create dictionary word words encoding by dictionary index
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict((zip(dictionary.values(), dictionary.keys())))
    return dictionary, reverse_dictionary

dictionary, reverse_dictionary = build_dataset(training_data)
vocab_size = len(dictionary)

In [4]:
# hyperparameters
learning_rate = 0.001
training_iters = 50000
display_step = 1000
n_input = 3
# number of units in RNN cell
# this is the dimensionality of the hidden state and output state
n_hidden = 512

In [5]:
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

# why not use get_variable?
# do we need dict?
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

In [6]:
def RNN(x, weights, biases):
    
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_input, 1)
    
    # 2-layer LSTM with n_hidden units per layer
    # set reuse to true?
    rnn_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.BasicLSTMCell(n_hidden),tf.contrib.rnn.BasicLSTMCell(n_hidden)])
    
    # create a RNN specified by cell
    outputs, states = tf.contrib.rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
    
    # only return last word output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

pred = RNN(x, weights, biases)

In [7]:
# Define loss and optimizer
# compute the cross entropy of the result after applying the 
# softmax function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
# How does RMS work? Updates learning rate
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

In [8]:
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [9]:
# initialise the variables
init = tf.global_variables_initializer()

In [11]:
# Run session
with tf.Session() as sess:
    sess.run(init)
    step = 0
    offset = random.randint(0,n_input+1)
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0
    
    while step < training_iters:
        # Generate a minibatch
        if offset > (len(training_data)-end_offset):
            offset = random.randint(0, n_input+1)
        
        # get three symbols from the training data, convert to integers 
        # for the input vector
        symbols_in_keys = [ [dictionary[str(training_data[i])]] for i in range(offset, offset+n_input) ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
        
        # training label is a one-hot vector coming from the next symbol after
        # the three input symbols in the source text
        symbols_out_onehot = np.zeros([vocab_size], dtype=float)
        symbols_out_onehot[dictionary[str(training_data[offset+n_input])]] = 1.0
        # reshape to fit the feed_dict
        symbols_out_onehot = np.reshape(symbols_out_onehot, [1,-1])
        
        # run session, return accuracy, loss, and prediction
        # how do we measure accuracy? loss?
        _, acc, loss, onehot_pred = sess.run([optimizer, accuracy, 
                                    cost, pred], feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        loss_total += loss
        acc_total += acc
        if (step+1) % display_step == 0:
            print("Iter= " + str(step+1) + ", Average Loss= " + \
                  "{:.6f}".format(loss_total/display_step) + ", Average Accuracy= " + \
                  "{:.2f}%".format(100*acc_total/display_step))
            acc_total = 0
            loss_total = 0
            symbols_in = [training_data[i] for i in range(offset, offset + n_input)]
            symbols_out = training_data[offset + n_input]
            symbols_out_pred = reverse_dictionary[int(tf.argmax(onehot_pred, 1).eval())]
            print("%s - [%s] vs [%s]" % (symbols_in,symbols_out,symbols_out_pred))
        step += 1
        offset += (n_input+1)

    

Iter= 1000, Average Loss= 4.365232, Average Accuracy= 2.80%
['at', 'one', 'another'] - [and] vs [well]
Iter= 2000, Average Loss= 2.694892, Average Accuracy= 28.30%
['.', 'this', 'proposal'] - [met] vs [at]
Iter= 3000, Average Loss= 2.539779, Average Accuracy= 37.90%
['when', 'she', 'was'] - [about] vs [about]
Iter= 4000, Average Loss= 1.968727, Average Accuracy= 50.10%
['to', 'propose', 'that'] - [a] vs [means]
Iter= 5000, Average Loss= 1.947642, Average Accuracy= 54.30%
['we', 'could', 'easily'] - [escape] vs [escape]
Iter= 6000, Average Loss= 1.752010, Average Accuracy= 58.50%
['now', ',', 'if'] - [we] vs [we]
Iter= 7000, Average Loss= 1.318949, Average Accuracy= 65.50%
['he', 'thought', 'would'] - [meet] vs [know]
Iter= 8000, Average Loss= 1.240725, Average Accuracy= 68.20%
['that', 'but', 'at'] - [last] vs [last]
Iter= 9000, Average Loss= 1.134477, Average Accuracy= 68.50%
['common', 'enemy', ','] - [the] vs [the]
Iter= 10000, Average Loss= 0.871809, Average Accuracy= 75.80%
['nobo