In [1]:
import numpy as np
import tensorflow as tf
import random
import collections
import datetime

<img src="optimizer_graph.png" width="200px">

In [2]:
filename = "sherlock_holmes_shorter.txt"

## Setting hyper-parameters

In [3]:
learning_rate = 0.005
training_iters = 100000
display_step = 1000
n_input = 3
n_hidden = 100

## Reading Data

In [4]:
def prepdata(filename):
    with open(filename) as f:
        content = f.readlines()
    alllines = []
    for line in content:
        if line:
            alllines.extend(line.strip().split())
    # Convert to lower case
    alllines = [line.lower() for line in alllines]
    alllines = np.array(alllines).reshape([-1, ])
    print(alllines.shape)
    return alllines
    
wordarray = prepdata(filename)
#print(wordarray)

(925,)


In [8]:
def build_dataset(words):
    count = collections.Counter(words).most_common()
    dictionary = dict()
    for word, _ in count:
        dictionary[word] = len(dictionary)
    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return dictionary, reverse_dictionary
dictionary, reverse_dictionary = build_dataset(wordarray)
vocab_size = len(dictionary)
print(len(dictionary), len(reverse_dictionary))

475 475


In [10]:
def getdata(wordarray, fdict, rdict, n_input, vocab_size): 
    offset = random.randint(0, len(wordarray))
    end_offset = n_input + 1
    # Add some randomness on selection process.
    if offset > (len(wordarray)-end_offset):
        offset = random.randint(0, n_input+1)
    #print(offset)
    symbols = [fdict[str(wordarray[i])] for i in range(offset, offset+n_input)]
    #print(symbols, [rdict[i] for i in symbols])
    symbols = np.array(symbols).reshape([-1, n_input, 1])
        
    labels_onehot = np.zeros([vocab_size], dtype=np.float)
    alabel = wordarray[offset+n_input]
    #print(alabel, fdict[str(alabel)])
    labels_onehot[fdict[str(alabel)]] = 1.0
    labels_onehot = labels_onehot.reshape([1, -1])
    return (symbols, labels_onehot)

symbols, labels_onehot = getdata(wordarray, dictionary, reverse_dictionary, n_input, vocab_size)
print(symbols, np.argmax(labels_onehot))
#print(symbols[0][0], [reverse_dictionary[i] for i in symbols[0][0].tolist()])
#print(reverse_dictionary[symbols[0][0][0]])
#for i in range(100):
#    getdata(wordarray, dictionary, reverse_dictionary, n_input, vocab_size)

[[[ 4]
  [36]
  [38]]] 42


In [11]:
code = np.transpose(symbols[0]).tolist()[0]
reverse_code = [reverse_dictionary[i] for i in code]
print(code,reverse_code)

[4, 36, 38] ['a', 'day', 'should']


## Defining x and y as placeholders

In [12]:
x = tf.placeholder("float", [None, n_input, 1])
y = tf.placeholder("float", [None, vocab_size])

## Initializing Weights and Biases

In [13]:
weights = {
    'out': tf.Variable(tf.random_normal([n_hidden, vocab_size]))
}
biases = {
    'out': tf.Variable(tf.random_normal([vocab_size]))
}

## Defining model

In [14]:
def RNN(x, weights, biases):
    x = tf.reshape(x, [-1, n_input])
    # Generate a 3-element sequence of input values
    x = tf.split(x,n_input,1)
    rnn_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
    outputs, states = tf.contrib.rnn.static_rnn(rnn_cell, x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

## Define cost, optimizer and accuracy

In [15]:
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

## Running the session 
This will run the graph and use all the tensors that were previously defined

In [16]:
init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    step = 0
    total_loss = 0.0
    total_acc = 0.0
    while step <= training_iters:
        symbols, labels_onehot = getdata(wordarray, dictionary, reverse_dictionary, 
                                         n_input, vocab_size)
        _, acc, loss, onehot_pred = session.run([optimizer, accuracy, cost, pred], \
                                    feed_dict={x: symbols, y: labels_onehot})
        
        total_loss += loss
        total_acc += acc
        if(step%display_step  == 0):
            print("Iter: %d, loss: %0.4f, acc: %0.2f"%(
                            step, total_loss/display_step, 100.0*total_acc/display_step))
            total_loss = 0.0
            total_acc = 0.0
        step += 1

Iter: 0, loss: 0.0079, acc: 0.00
Iter: 1000, loss: 6.3673, acc: 4.70
Iter: 2000, loss: 5.6416, acc: 7.40
Iter: 3000, loss: 5.6954, acc: 7.90
Iter: 4000, loss: 6.2746, acc: 5.80
Iter: 5000, loss: 6.3490, acc: 6.90
Iter: 6000, loss: 6.5870, acc: 6.50
Iter: 7000, loss: 6.5911, acc: 6.80
Iter: 8000, loss: 6.7857, acc: 5.30
Iter: 9000, loss: 7.0128, acc: 5.30
Iter: 10000, loss: 6.9556, acc: 5.70
Iter: 11000, loss: 6.9382, acc: 6.00
Iter: 12000, loss: 7.2032, acc: 6.70
Iter: 13000, loss: 7.1264, acc: 5.80
Iter: 14000, loss: 7.3576, acc: 6.90
Iter: 15000, loss: 7.6518, acc: 5.60
Iter: 16000, loss: 7.7178, acc: 5.60
Iter: 17000, loss: 7.7205, acc: 5.70
Iter: 18000, loss: 7.8636, acc: 4.80
Iter: 19000, loss: 8.0402, acc: 5.50
Iter: 20000, loss: 8.0200, acc: 6.70
Iter: 21000, loss: 7.8849, acc: 5.60
Iter: 22000, loss: 8.1723, acc: 5.70
Iter: 23000, loss: 7.8032, acc: 5.70
Iter: 24000, loss: 8.0732, acc: 5.20
Iter: 25000, loss: 8.0851, acc: 5.80
Iter: 26000, loss: 7.7533, acc: 6.00
Iter: 27000, l