<a href="https://colab.research.google.com/github/sosmany1/RNN_Parity-Problem/blob/master/RNN_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#The input to the RNN at every time-step is the current value as well as a state vector 
#which represents what the network has “seen” at time-steps before. 
#This state-vector is the encoded memory of the RNN, initially set to zero.

from __future__ import print_function, division
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

num_epochs = 100
total_series_length = 50000
truncated_backprop_length = 2
#this is used in a confusing way, it basically refers to how many elements are inside each batch. 
#So the matrix we put in the placeholderX, has dimensions batchsize, aka number of rows, and each row then takes in certain number of elements which
# here are referred to as truncated back prop length. go figure.
state_size = 4
num_classes = 2
echo_step = 1
batch_size = 4
num_batches = total_series_length//batch_size//truncated_backprop_length

def generateData():
    x = 1.0-2*(np.random.randn(batch_size,3) < 0).astype(np.float32)
    y  = np.prod(x, axis=1)[:,np.newaxis]

    x = x.reshape((batch_size, -1))  # The first index changing slowest, subseries as rows
    y = y.reshape((batch_size, -1))
    # Reshaping takes the whole dataset and puts it into a matrix, that later will be sliced up into mini-batches.
    # reshape takes your data, and it reshapes it into a tensor with the shape (a,b). 
    #Here batch_size is the number of rows, and -1 means however many elements need to be in each row to fit all the data in batchsize num of rows.
    
    print(x)
    print(y)
    return (x, y)

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length]) #placeholders are starting nodes, the first paramater gives datatype
batchY_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length]) # the second gives shape, here its 5, 15. so matrix is 5 categories with 15 categories in them, you get the idea...

init_state = tf.placeholder(tf.float32, [batch_size, state_size]) #here you have a placeholder for the initial state. they have the shape, 5 and 4.
#states are fed into the next 'layer'. the placeholder for the state matrix is batch size, ie 5, and state size, ie 4. Check this 

W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,num_classes)), dtype=tf.float32)

# Unpack columns
inputs_series = tf.unstack(batchX_placeholder, axis=1) 
labels_series = tf.unstack(batchY_placeholder, axis=1) 
#here we are taking the columns of the matrix and putting them into rows so that we have a list (axis=1) of elements corresponding to diff batches
#this is so that we can train on multiple batches at the same time.

#now this also means that we have to save three (batch size) number of states. 
#So thats why the it state place holder defined above has a shape of batchsize (with statesize entries in it. not sure why this is the case)
# Forward pass
current_state = init_state
states_series = []

for current_input in inputs_series:
    current_input = tf.reshape(current_input, [batch_size, 1])
    input_and_state_concatenated = tf.concat([current_input, current_state],1)  # Increasing number of columns
    # Notice the concatenation on above line, what we actually want to do is calculate the sum of two affine transforms:
    #current_input * Wa + current_state * Wb as shown in the figure. 
    #By concatenating those two tensors you will only use one matrix multiplication. 
    #The addition of the bias b is broadcasted on all samples in the batch.r

    next_state = tf.tanh(tf.matmul(input_and_state_concatenated, W) + b)  # Broadcasted addition
    states_series.append(next_state)
    current_state = next_state

print("states_series")
print(states_series)


logits_series = [tf.matmul(state, W2) + b2 for state in states_series] #Broadcasted addition
#W2 and B2 are the weights and biases of the state vector, they have their own weights because they act separately and propogate.
#this gives a series as well because remember we are runnigng different parts of the time series at the same time.
#this matrix with matmul states series and w2, is called "logits series". its just the output of states multiplied with their weights plus bias
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
#print(logits_series)
#print(labels_series)
#sh = len(logits_series)
#print(sh)
#losses = [tf.reduce_mean((labels - logits)) for logits, labels in zip(logits_series,labels_series)]
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels,logits) for logits, labels in zip(logits_series,labels_series)]
total_loss = tf.reduce_mean(losses)

train_step = tf.train.AdagradOptimizer(0.3).minimize(total_loss)

def plot(loss_list, predictions_series, batchX, batchY):
    plt.subplot(2, 3, 1)
    plt.cla()
    plt.plot(loss_list)

    for batch_series_idx in range(5):
        one_hot_output_series = np.array(predictions_series)[:, batch_series_idx, :]
        single_output_series = np.array([(1 if out[0] < 0.5 else 0) for out in one_hot_output_series])

        plt.subplot(2, 3, batch_series_idx + 2)
        plt.cla()
        plt.axis([0, truncated_backprop_length, 0, 2])
        left_offset = range(truncated_backprop_length)
        plt.bar(left_offset, batchX[batch_series_idx, :], width=1, color="blue")
        plt.bar(left_offset, batchY[batch_series_idx, :] * 0.5, width=1, color="red")
        plt.bar(left_offset, single_output_series * 0.3, width=1, color="green")

    plt.draw()
    plt.pause(0.0001)


with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    plt.ion()
    plt.figure()
    plt.show()
    loss_list = []

    for epoch_idx in range(num_epochs):
        x,y = generateData()
        _current_state = np.zeros((batch_size, state_size))

        print("New data, epoch", epoch_idx)

        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx]
            batchY = y[:,start_idx:end_idx]

            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)

            if batch_idx%100 == 0:
                print("Step",batch_idx, "Loss", _total_loss)
                plot(loss_list, _predictions_series, batchX, batchY)

plt.ioff()
plt.show()

print ("inputs_series")
print(inputs_series)
for input in inputs_series:
  print(input)


states_series
[<tf.Tensor 'Tanh:0' shape=(4, 4) dtype=float32>, <tf.Tensor 'Tanh_1:0' shape=(4, 4) dtype=float32>]


ValueError: ignored