In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from CopyMemoryDataset import CopyMemoryProbs
import IPython.display as ipd
import glob
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
n_train_samples = 10000
n_test_samples = 1000
n_train_seq = 100
batch_sz = 10
n_hidden = 128
n_inputs = 1 #f
n_classes = 10


cm_dataset = CopyMemoryProbs(n_train_samples, n_train_seq, n_classes).generate_data()
cm_dataset_test = CopyMemoryProbs(n_test_samples, n_train_seq, n_classes).generate_data()

x_train = np.squeeze(cm_dataset[0], axis=2)
y_train = np.squeeze(cm_dataset[1], axis=2)

x_test = np.squeeze(cm_dataset_test[0], axis=2)
y_test = np.squeeze(cm_dataset_test[1], axis=2) 

print(x_train.shape, y_train.shape)

(10000, 100) (10000, 100)


In [3]:
def define_truncated_gaussian_weights(shape, name):
    return tf.Variable(tf.truncated_normal(shape, mean=0., stddev=1.), name=name)

def define_uniform_weights(shape, name):
    return tf.Variable(tf.random_uniform(shape, -1, 1), name=name)

def define_xavier_weights(shape, name):
    w_init = np.sqrt(6./(shape[0]+shape[1]))
    return tf.Variable(tf.random_uniform(shape, -w_init, w_init), name=name)

def define_constant_weights(shape, name):
    return tf.Variable(tf.constant(0.01), name=name)

In [4]:
tf.reset_default_graph()

x = tf.placeholder('int32', [batch_sz, n_train_seq], name='x') # Each of them is a scalar
y = tf.placeholder('int64', [batch_sz, n_train_seq], name='y')

x_oh = tf.one_hot(x, depth=n_classes, dtype='float32')

# I/P -> Hidden
gru_cell = tf.contrib.rnn.GRUCell(n_hidden)
h_out, _ = tf.nn.dynamic_rnn(gru_cell, x_oh, dtype='float32')

# Hidden -> O/P
W_output = define_xavier_weights([n_hidden, n_classes], 'W_output')
b_output = define_constant_weights([n_classes], 'b_output')

def output_step(state):
    return tf.matmul(state, W_output) + b_output

y_pred_oh = tf.map_fn(fn = output_step, 
                   elems = h_out,
                   name = 'output_step')

y_pred = tf.argmax(y_pred_oh, 2)

cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred_oh, labels=y))
correct_pred = tf.equal(y_pred, y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, 'float32'))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.89, beta2=0.94).minimize(cost)

In [None]:
n_train_epochs = 1001
test_step = 10

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_epoch_loss = []
    train_epoch_acc = []
    test_epoch_loss = []
    test_epoch_acc = []    
    
    print('..... Training')
    for epoch_idx in range(n_train_epochs):
        
        minibatch_loss = []
        minibatch_acc = []
        n_minibatches = n_train_samples // batch_sz
        
        for batch_idx in range(n_minibatches):
            start_idx = batch_idx * batch_sz
            end_idx = start_idx + batch_sz
            batch_x = x_train[start_idx:end_idx] # <b,t,f>
            batch_y = y_train[start_idx:end_idx] # <b,t,o>

            _, loss, acc, y_pred_val = sess.run(
                [optimizer, cost, accuracy, y_pred],
                feed_dict={
                    x:batch_x,
                    y:batch_y
                })
            minibatch_loss.append(loss)
            minibatch_acc.append(acc)
            
        train_epoch_loss.append(np.mean(minibatch_loss))
        train_epoch_acc.append(np.mean(minibatch_acc)*100)
        print ("Epoch {}\n\tTraining Error: {},  Train Accuracy: {}".format(epoch_idx, train_epoch_loss[-1], train_epoch_acc[-1]))

            
        if epoch_idx%100==0:
#             print('Orig_x:', batch_x[0].astype(np.int32))
            print('Orig_y:', batch_y[0].astype(np.int32))
            print('Pred_y:', y_pred_val[0])
        
        
        if epoch_idx % test_step == 0:
            minibatch_loss = []
            minibatch_acc = []
            n_minibatches = n_test_samples // batch_sz   
            
            for batch_idx in range(n_minibatches):
                start_idx = batch_idx * batch_sz
                end_idx = start_idx + batch_sz
                batch_x = x_test[start_idx:end_idx] # <b,t,f>
                batch_y = y_test[start_idx:end_idx] # <b,t,o>

                loss, acc, y_pred_val = sess.run(
                    [cost, accuracy, y_pred],
                    feed_dict={
                        x:batch_x,
                        y:batch_y
                    })
                minibatch_loss.append(loss)
                minibatch_acc.append(acc)
                
            test_epoch_loss.append(np.mean(minibatch_loss))
            test_epoch_acc.append(np.mean(minibatch_acc)*100)
                    

            print ("\tTest Error: {},  Test Accuracy: {}".format(test_epoch_loss[-1], test_epoch_acc[-1]))

        

        
        


..... Training
Epoch 0
	Training Error: 0.8713966608047485,  Train Accuracy: 64.65550065040588
Orig_y: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 5 8 3 2 3 4 6 1 2 8 5 1 7
 6 8 5 1 2 2 4 8 3 7 3 7 1 2 4 4 3 1 2 6 1 8 7 8 8 4]
Pred_y: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 1 1 1 1 1 6 6 6 6 6 6 6
 6 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
	Test Error: 0.8334941864013672,  Test Accuracy: 64.97600078582764
Epoch 1
	Training Error: 0.8339499831199646,  Train Accuracy: 65.04110097885132
Epoch 2
	Training Error: 0.8218659162521362,  Train Accuracy: 65.93199968338013
Epoch 3
	Training Error: 0.8011791706085205,  Train Accuracy: 67.3054039478302
Epoch 4
	Training Error: 0.7895611524581909,  Train Accuracy: 67.79959797859192
Epoch 5
	Training Error: 0.7793830633163452,  Train Accuracy: 68.59820485115051
Epoch 6
	Traini

In [None]:
plt.plot(train_epoch_loss)
plt.show()

In [None]:
plt.plot(train_epoch_acc)
plt.show()