In [None]:
import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.layers as layers
#from tensorflow.python.ops.functional_ops import map_fn as map_fn
map_fn = tf.map_fn

In [None]:
data_path = "/Users/kevindsouza/Documents/UBC/Term2/CPSC532L/Project/TensorFlow/graphs-and-NLP/LSTM/LSTM/simple-examples/data"

In [None]:
def read_words(filename):
    with tf.gfile.GFile(filename,"r") as f:
        return f.read().replace("\n","<eos>").split()
    
def build_vocab(filename):
    data = read_words(filename)
    
    #print(data[1])
    counter = collections.Counter(data)
    #print(counter)
    count_pairs = sorted(counter.items(),key=lambda x: (-x[1],x[0]))
    #print(count_pairs)
    
    words,_ = list(zip(*count_pairs))
    
    word_to_id = dict(zip(words,range(len(words))))
    return word_to_id

def file_to_word_ids(filename,word_to_id):
    data = read_words(filename)
    return [word_to_id[word] for word in data if word in word_to_id]

def load_data():
    train_path = os.path.join(data_path,"ptb.train.txt")
    valid_path = os.path.join(data_path,"ptb.valid.txt")
    test_path = os.path.join(data_path,"ptb.test.txt")
    
    #build vocab and then convert into list of integers
    word_to_id = build_vocab(train_path)
    train_data = file_to_word_ids(train_path,word_to_id)
    valid_data = file_to_word_ids(valid_path,word_to_id)
    test_data = file_to_word_ids(test_path,word_to_id)
    
    vocabulary = len(word_to_id)
    
    reversed_dictionary = dict(zip(word_to_id.values(),word_to_id.keys()))
    
    print(train_data[:5])
    print(vocabulary)
    #print(word_to_id)
    print(" ".join([reversed_dictionary[x] for x in train_data[:10]]))
    
    return train_data,valid_data,test_data,vocabulary,reversed_dictionary 


train_data,valid_data,test_data,vocabulary,reversed_dictionary = load_data()                   

In [None]:
def batch_producer(raw_data,batch_size,num_steps):
    raw_data = tf.convert_to_tensor(raw_data,name="raw_data",dtype=tf.int32)
    
    data_len = tf.size(raw_data)
    batch_len = data_len // batch_size
    
    data = tf.reshape(raw_data[0:batch_size*batch_len],[batch_size,batch_len])
    
    epoch_size = (batch_len - 1) // num_steps
    
    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
    #print(i)
    x = data[:,i*(num_steps):(i+1)*num_steps]
    #print(x)
    x.set_shape([batch_size,num_steps])
    y = data[:,i*(num_steps) + 1:(i+1)*num_steps + 1]
    y.set_shape([batch_size,num_steps])
    
    return x,y


In [None]:
INPUT_SIZE    = 2       # 2 bits per timestep
RNN_HIDDEN    = 20
OUTPUT_SIZE   = 1       # 1 bit per timestep
TINY          = 1e-6    # to avoid NaNs in logs
LEARNING_RATE = 0.01

USE_LSTM = True

inputs  = tf.placeholder(tf.float32, (None, None, INPUT_SIZE))  # (time, batch, in)
outputs = tf.placeholder(tf.float32, (None, None, OUTPUT_SIZE)) # (time, batch, out)


## Here cell can be any function you want, provided it has two attributes:
#     - cell.zero_state(batch_size, dtype)- tensor which is an initial value
#                                           for state in __call__
#     - cell.__call__(input, state) - function that given input and previous
#                                     state returns tuple (output, state) where
#                                     state is the state passed to the next
#                                     timestep and output is the tensor used
#                                     for infering the output at timestep. For
#                                     example for LSTM, output is just hidden,
#                                     but state is memory + hidden
# Example LSTM cell with learnable zero_state can be found here:
#    https://gist.github.com/nivwusquorum/160d5cf7e1e82c21fad3ebf04f039317
if USE_LSTM:
    cell = tf.contrib.rnn.BasicLSTMCell(RNN_HIDDEN, state_is_tuple=True)
else:
    cell = tf.contrib.rnn.BasicRNNCell(RNN_HIDDEN)

# Create initial state. Here it is just a constant tensor filled with zeros,
# but in principle it could be a learnable parameter. This is a bit tricky
# to do for LSTM's tuple state, but can be achieved by creating two vector
# Variables, which are then tiled along batch dimension and grouped into tuple.
batch_size    = tf.shape(inputs)[1]
initial_state = cell.zero_state(batch_size, tf.float32)
print(initial_state)

# Given inputs (time, batch, input_size) outputs a tuple
#  - outputs: (time, batch, output_size)  [do not mistake with OUTPUT_SIZE]
#  - states:  (time, batch, hidden_size)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, time_major=True)

# project output from rnn output size to OUTPUT_SIZE. Sometimes it is worth adding
# an extra layer here.
final_projection = lambda x: layers.linear(x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid)

# apply projection to every timestep.
predicted_outputs = map_fn(final_projection, rnn_outputs)

# compute elementwise cross entropy.
error = -(outputs * tf.log(predicted_outputs + TINY) + (1.0 - outputs) * tf.log(1.0 - predicted_outputs + TINY))
error = tf.reduce_mean(error)

# optimize
train_fn = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(error)

# assuming that absolute difference between output and correct answer is 0.5
# or less we can round it to the correct output.
accuracy = tf.reduce_mean(tf.cast(tf.abs(outputs - predicted_outputs) < 0.5, tf.float32))


In [None]:
NUM_STEPS = 35
ITERATIONS_PER_EPOCH = 100
BATCH_SIZE = 16

valid_x, valid_y = batch_producer(train_data,BATCH_SIZE,NUM_STEPS)

session = tf.Session()
# For some reason it is our job to do this:
session.run(tf.global_variables_initializer())

for epoch in range(5):
    epoch_error = 0
    for _ in range(ITERATIONS_PER_EPOCH):
        # here train_fn is what triggers backprop. error and accuracy on their
        # own do not trigger the backprop.
        x, y = generate_batch(num_bits=NUM_BITS, batch_size=BATCH_SIZE)
        epoch_error += session.run([error, train_fn], {
            inputs: x,
            outputs: y,
        })[0]
    epoch_error /= ITERATIONS_PER_EPOCH
    valid_accuracy = session.run(accuracy, {
        inputs:  valid_x,
        outputs: valid_y,
    })
    print("Epoch {:d}, train error {:.2f}, valid accuracy {:.1f}".format(epoch, epoch_error, valid_accuracy * 100.0))