Code based on https://gist.githubusercontent.com/AFAgarap/92c1c4a5dd771999b0201ec0e7edfee0/raw/828fbda0e466dacb1fad66549e0e3022e1c7263a/gru_svm_zalando_dropout.py by Abien Fred Agarap

In [1]:
import time
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
data = input_data.read_data_sets('data/fashion', one_hot=True)

Extracting data/fashion/train-images-idx3-ubyte.gz
Extracting data/fashion/train-labels-idx1-ubyte.gz
Extracting data/fashion/t10k-images-idx3-ubyte.gz
Extracting data/fashion/t10k-labels-idx1-ubyte.gz


In [3]:
CHECKPOINT_PATH = 'models/'
MODEL_NAME = 'model19' + str(time.asctime()) +'.ckpt'
LOGS_PATH = 'logs/'
RESTORE_CKP = False

In [4]:
# hyper-parameters
EPOCHS = 100
BATCH_SIZE = 1024
EV_BATCH_SIZE = 10000
CELL_SIZE = 256
DROPOUT_P_KEEP = 0.55
NUM_CLASSES = 10
SVM_C = 1

# dataset dimension
CHUNK_SIZE = 28
NUM_CHUNKS = 28

# learning rate dacay parameters
INITIAL_LEARNING_RATE = 1e-3
LEARNING_RATE_DECAY_FACTOR = 0.995
NUM_EPOCHS_PER_DECAY = 1

In [5]:
x = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CHUNKS, CHUNK_SIZE], name='x_input')
y = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name='y_input')
h = tf.placeholder(dtype=tf.float32, shape=[None, CELL_SIZE], name='state')

learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')
p_keep = tf.placeholder(dtype=tf.float32, name='p_keep')

# evaluation placeholders
ev_x = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CHUNKS, CHUNK_SIZE], name='x_input')
ev_y = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name='y_input')
ev_value = tf.placeholder(dtype=tf.float32, name="ev_value")

In [6]:
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

In [7]:
def recurrent_neural_network(x):
    with tf.name_scope('weights_and_biases'):
        with tf.name_scope('weights'):
            xav_init = tf.contrib.layers.xavier_initializer
            weight_2 = tf.get_variable('weights_2', shape=[CELL_SIZE, NUM_CLASSES], initializer=xav_init())
            variable_summaries(weight_2)
        with tf.name_scope('biases'):
            bias_2 = tf.get_variable('biases_2', initializer=tf.constant(0.1, shape=[NUM_CLASSES]))
            variable_summaries(bias_2)

    cell = tf.contrib.rnn.GRUCell(CELL_SIZE)
    drop_cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=p_keep, output_keep_prob=p_keep)

    outputs, states = tf.nn.dynamic_rnn(drop_cell, x, initial_state=h, dtype=tf.float32)

    states = tf.identity(states, name='H')
    hf = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(hf, int(hf.get_shape()[0]) - 1)

    with tf.name_scope('Wx_plus_b'):
        output = tf.matmul(last, weight_2) + bias_2
        tf.summary.histogram('pre-activations', output)
    return output, weight_2, states

In [8]:
def train_neural_network(x):
    # Decay the learning rate exponentially based on the number of steps.
    # gstep is just a placeholder for epoch number, that will trigger decay of lr
    gstep = tf.placeholder(dtype=tf.float32, shape=[], name="global_step")
    learning_rate = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                                gstep,
                                                NUM_EPOCHS_PER_DECAY,
                                                LEARNING_RATE_DECAY_FACTOR,
                                                staircase=True, name='learning_rate')    
   
    prediction, weight, states = recurrent_neural_network(x)

    # Add a summary to track the learning rate.
    tf.summary.scalar('learning_rate', learning_rate)

    with tf.name_scope('loss'):
        regularization_loss = 0.5 * tf.reduce_sum(tf.square(weight)) #+ 0.5 * tf.reduce_sum(tf.square(maxout_weight))
        hinge_loss = tf.reduce_sum(tf.square(tf.maximum(tf.zeros([BATCH_SIZE, NUM_CLASSES]),
                                                        1 - tf.cast(y, tf.float32) * prediction)))
        with tf.name_scope('loss'):
            cost = regularization_loss + SVM_C * hinge_loss
    tf.summary.scalar('loss', cost)
    
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    with tf.name_scope('accuracy'):
        predicted_class = tf.sign(prediction)
        predicted_class = tf.identity(predicted_class, name='prediction')
        with tf.name_scope('correct_prediction'):
            correct = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(y, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
    tf.summary.scalar('accuracy', accuracy)

    # merge is before ev_summary_op, because we want evaluation to be in a separate op
    # otherwise we will double log the predictions from train set
    merged = tf.summary.merge_all()
    
    with tf.name_scope('evaluation'):
        ev_predicted_class = tf.sign(prediction)
        ev_predicted_class = tf.identity(ev_predicted_class, name='prediction')
        with tf.name_scope('correct_prediction'):
            ev_correct = tf.equal(tf.argmax(ev_predicted_class, 1), tf.argmax(y, 1))
        with tf.name_scope('evaluation'):
            evaluation = tf.reduce_mean(tf.cast(ev_correct, 'float'))
    ev_summary_op = tf.summary.scalar("evaluation", ev_value)
    

    timestamp = str(time.asctime())
    train_writer = tf.summary.FileWriter(LOGS_PATH + timestamp, graph=tf.get_default_graph())
    saver = tf.train.Saver(max_to_keep=10)

    current_state = np.zeros([BATCH_SIZE, CELL_SIZE])

    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    
    with tf.Session() as sess:
        sess.run(init_op)

        checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH)

        if checkpoint and checkpoint.model_checkpoint_path and RESTORE_CKP:
            saver.restore(sess, tf.train.latest_checkpoint(CHECKPOINT_PATH))
            pass
        try:
            for epoch in range(EPOCHS):
                
                epoch_loss = 0
                for _ in range(int(data.train.num_examples / BATCH_SIZE)):
                    epoch_x, epoch_y = data.train.next_batch(BATCH_SIZE)
                    
                    epoch_y[epoch_y == 0] = -1

                    epoch_x =  epoch_x.reshape((BATCH_SIZE, NUM_CHUNKS, CHUNK_SIZE))
                    

                    feed_dict = {x: epoch_x, y: epoch_y, h: current_state,
                                   gstep: epoch, p_keep:DROPOUT_P_KEEP}

                    summary, lr, _, next_state, c, accuracy_ = sess.run([merged, learning_rate, optimizer, states, cost, accuracy],
                                                                    feed_dict=feed_dict)
                    epoch_loss = c
                    current_state = next_state
                    #
                    # Real-time evaluation on test set
                    #
                    with tf.name_scope('evaluation'):
                        ev_x_, ev_y_ = data.test.next_batch(EV_BATCH_SIZE) 
                        ev_x_ = ev_x_.reshape((-1, NUM_CHUNKS, CHUNK_SIZE))
                        ev_y_[ev_y_ == 0] = -1

                        ev_accuracy_ = sess.run(evaluation, feed_dict={x: ev_x_, y: ev_y_,
                                                      h: np.zeros([EV_BATCH_SIZE, CELL_SIZE]),
                                                      p_keep: 1.0})
                        ev_summary_op_ = sess.run(ev_summary_op, feed_dict={ev_value: ev_accuracy_})
                
                if epoch % 2 == 0:
                    saver.save(sess, CHECKPOINT_PATH + MODEL_NAME, global_step=epoch)
                train_writer.add_summary(summary, epoch)
                train_writer.add_summary(ev_summary_op_, epoch)
                print('Epoch : {} completed out of {}, loss : {}, accuracy : {}, acc2: {}'.format(epoch, EPOCHS,
                                                                                        epoch_loss, accuracy_, ev_accuracy_))
        except KeyboardInterrupt:
            print('Training interrupted at {}'.format(epoch))
        finally:
            train_writer.close()

        saver.save(sess, CHECKPOINT_PATH + MODEL_NAME, global_step=epoch)

        x_ = data.test.images.reshape((-1, NUM_CHUNKS, CHUNK_SIZE))
        y_ = data.test.labels
        y_[y_ == 0] = -1

        accuracy_ = sess.run(accuracy, feed_dict={x: x_, y: y_,
                                                  h: np.zeros([10000, CELL_SIZE]),
                                                  p_keep: 1.0})

        print('Accuracy : {}'.format(accuracy_))

In [9]:
train_neural_network(x)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch : 0 completed out of 100, loss : 2817.30517578125, accuracy : 0.283203125, acc2: 0.34899991750717163
Epoch : 1 completed out of 100, loss : 1431.6405029296875, accuracy : 0.67578125, acc2: 0.8008000254631042
Epoch : 2 completed out of 100, loss : 1015.3193359375, accuracy : 0.8046875, acc2: 0.897800087928772
Epoch : 3 completed out of 100, loss : 743.4059448242188, accuracy : 0.865234375, acc2: 0.9122000932693481
Epoch : 4 completed out of 100, loss : 553.884033203125, accuracy : 0.908203125, acc2: 0.93340003490448
Epoch : 5 completed out of 100, loss : 389.5703430175781, accuracy : 0.9384765625, acc2: 0.9441001415252686
Epoch : 6 completed out of 100, loss : 358.37115478515625, accuracy : 0.9453125, acc2: 0.9532001614570618
Epoch : 7 completed out of 100, loss : 303.1078796386719, accuracy : 0.9462890625, acc2: 0.9571000933647156
Epoch : 8 completed out of 100, loss : 337.7618103027344, accuracy : 0.94921875, acc2: 0.9571001529693604
Epoch : 9 completed out of 100, loss : 266.24

Epoch : 76 completed out of 100, loss : 82.93943786621094, accuracy : 0.990234375, acc2: 0.9842001795768738
Epoch : 77 completed out of 100, loss : 70.34688568115234, accuracy : 0.986328125, acc2: 0.9808000922203064
Epoch : 78 completed out of 100, loss : 61.325401306152344, accuracy : 0.9892578125, acc2: 0.983100175857544
Epoch : 79 completed out of 100, loss : 75.35031127929688, accuracy : 0.9873046875, acc2: 0.9851002097129822
Epoch : 80 completed out of 100, loss : 57.911041259765625, accuracy : 0.98828125, acc2: 0.9863001704216003
Epoch : 81 completed out of 100, loss : 66.95071411132812, accuracy : 0.9892578125, acc2: 0.9862000942230225
Epoch : 82 completed out of 100, loss : 82.3820571899414, accuracy : 0.9921875, acc2: 0.9843000769615173
Epoch : 83 completed out of 100, loss : 101.98953247070312, accuracy : 0.984375, acc2: 0.9846000671386719
Epoch : 84 completed out of 100, loss : 87.40266418457031, accuracy : 0.9873046875, acc2: 0.9853000640869141
Epoch : 85 completed out of 1