Code based on https://gist.githubusercontent.com/AFAgarap/92c1c4a5dd771999b0201ec0e7edfee0/raw/828fbda0e466dacb1fad66549e0e3022e1c7263a/gru_svm_zalando_dropout.py by Abien Fred Agarap

In [1]:
import time
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
data = input_data.read_data_sets('data/fashion', one_hot=True)

Extracting data/fashion/train-images-idx3-ubyte.gz
Extracting data/fashion/train-labels-idx1-ubyte.gz
Extracting data/fashion/t10k-images-idx3-ubyte.gz
Extracting data/fashion/t10k-labels-idx1-ubyte.gz


In [3]:
CHECKPOINT_PATH = 'models/'
MODEL_NAME = 'model19' + str(time.asctime()) +'.ckpt'
LOGS_PATH = 'logs/'
RESTORE_CKP = True

In [4]:
# hyper-parameters
EPOCHS = 300
BATCH_SIZE = 1024
EV_BATCH_SIZE = 10000
CELL_SIZE = 256
DROPOUT_P_KEEP = 0.55
NUM_CLASSES = 10
SVM_C = 1

# dataset dimension
CHUNK_SIZE = 28
NUM_CHUNKS = 28

# learning rate dacay parameters
INITIAL_LEARNING_RATE = 1e-3
LEARNING_RATE_DECAY_FACTOR = 0.995
NUM_EPOCHS_PER_DECAY = 1

In [5]:
x = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CHUNKS, CHUNK_SIZE], name='x_input')
y = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name='y_input')
h = tf.placeholder(dtype=tf.float32, shape=[None, CELL_SIZE], name='state')

learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')
p_keep = tf.placeholder(dtype=tf.float32, name='p_keep')

# evaluation placeholders
ev_x = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CHUNKS, CHUNK_SIZE], name='x_input')
ev_y = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name='y_input')
ev_value = tf.placeholder(dtype=tf.float32, name="ev_value")

In [6]:
def variable_summaries(var):
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

In [7]:
def recurrent_neural_network(x):
    with tf.name_scope('weights_and_biases'):
        with tf.name_scope('weights'):
            xav_init = tf.contrib.layers.xavier_initializer
            weight_2 = tf.get_variable('weights_2', shape=[CELL_SIZE, NUM_CLASSES], initializer=xav_init())
            variable_summaries(weight_2)
        with tf.name_scope('biases'):
            bias_2 = tf.get_variable('biases_2', initializer=tf.constant(0.1, shape=[NUM_CLASSES]))
            variable_summaries(bias_2)

    cell = tf.contrib.rnn.GRUCell(CELL_SIZE)
    drop_cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=p_keep, output_keep_prob=p_keep)

    outputs, states = tf.nn.dynamic_rnn(drop_cell, x, initial_state=h, dtype=tf.float32)

    states = tf.identity(states, name='H')
    hf = tf.transpose(outputs, [1, 0, 2])
    last = tf.gather(hf, int(hf.get_shape()[0]) - 1)

    with tf.name_scope('Wx_plus_b'):
        output = tf.matmul(last, weight_2) + bias_2
        tf.summary.histogram('pre-activations', output)
    return output, weight_2, states

In [8]:
def train_neural_network(x):
    # Decay the learning rate exponentially based on the number of steps.
    # gstep is just a placeholder for epoch number, that will trigger decay of lr
    gstep = tf.placeholder(dtype=tf.float32, shape=[], name="global_step")
    learning_rate = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                                gstep,
                                                NUM_EPOCHS_PER_DECAY,
                                                LEARNING_RATE_DECAY_FACTOR,
                                                staircase=True, name='learning_rate')    
   
    prediction, weight, states = recurrent_neural_network(x)

    # Add a summary to track the learning rate.
    tf.summary.scalar('learning_rate', learning_rate)

    with tf.name_scope('loss'):
        regularization_loss = 0.5 * tf.reduce_sum(tf.square(weight)) #+ 0.5 * tf.reduce_sum(tf.square(maxout_weight))
        hinge_loss = tf.reduce_sum(tf.square(tf.maximum(tf.zeros([BATCH_SIZE, NUM_CLASSES]),
                                                        1 - tf.cast(y, tf.float32) * prediction)))
        with tf.name_scope('loss'):
            cost = regularization_loss + SVM_C * hinge_loss
    tf.summary.scalar('loss', cost)
    
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    with tf.name_scope('accuracy'):
        predicted_class = tf.sign(prediction)
        predicted_class = tf.identity(predicted_class, name='prediction')
        with tf.name_scope('correct_prediction'):
            correct = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(y, 1))
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
    tf.summary.scalar('accuracy', accuracy)

    # merge is before ev_summary_op, because we want evaluation to be in a separate op
    # otherwise we will double log the predictions from train set
    merged = tf.summary.merge_all()
    
    with tf.name_scope('evaluation'):
        ev_predicted_class = tf.sign(prediction)
        ev_predicted_class = tf.identity(ev_predicted_class, name='prediction')
        with tf.name_scope('correct_prediction'):
            ev_correct = tf.equal(tf.argmax(ev_predicted_class, 1), tf.argmax(y, 1))
        with tf.name_scope('evaluation'):
            evaluation = tf.reduce_mean(tf.cast(ev_correct, 'float'))
    ev_summary_op = tf.summary.scalar("evaluation", ev_value)
    

    timestamp = str(time.asctime())
    train_writer = tf.summary.FileWriter(LOGS_PATH + timestamp, graph=tf.get_default_graph())
    saver = tf.train.Saver(max_to_keep=10)

    current_state = np.zeros([BATCH_SIZE, CELL_SIZE])

    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    
    with tf.Session() as sess:
        sess.run(init_op)

        checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_PATH)

        if checkpoint and checkpoint.model_checkpoint_path and RESTORE_CKP:
            saver.restore(sess, tf.train.latest_checkpoint(CHECKPOINT_PATH))
            pass
        try:
            for epoch in range(EPOCHS):
                
                epoch_loss = 0
                for _ in range(int(data.train.num_examples / BATCH_SIZE)):
                    epoch_x, epoch_y = data.train.next_batch(BATCH_SIZE)
                    
                    epoch_y[epoch_y == 0] = -1

                    epoch_x =  epoch_x.reshape((BATCH_SIZE, NUM_CHUNKS, CHUNK_SIZE))
                    

                    feed_dict = {x: epoch_x, y: epoch_y, h: current_state,
                                   gstep: epoch, p_keep:DROPOUT_P_KEEP}

                    summary, lr, _, next_state, c, accuracy_ = sess.run([merged, learning_rate, optimizer, states, cost, accuracy],
                                                                    feed_dict=feed_dict)
                    epoch_loss = c
                    current_state = next_state
                    #
                    # Real-time evaluation on test set
                    #
                    with tf.name_scope('evaluation'):
                        ev_x_, ev_y_ = data.test.next_batch(EV_BATCH_SIZE) 
                        ev_x_ = ev_x_.reshape((-1, NUM_CHUNKS, CHUNK_SIZE))
                        ev_y_[ev_y_ == 0] = -1

                        ev_accuracy_ = sess.run(evaluation, feed_dict={x: ev_x_, y: ev_y_,
                                                      h: np.zeros([EV_BATCH_SIZE, CELL_SIZE]),
                                                      p_keep: 1.0})
                        ev_summary_op_ = sess.run(ev_summary_op, feed_dict={ev_value: ev_accuracy_})
                
                if epoch % 2 == 0:
                    saver.save(sess, CHECKPOINT_PATH + MODEL_NAME, global_step=epoch)
                train_writer.add_summary(summary, epoch)
                train_writer.add_summary(ev_summary_op_, epoch)
                print('Epoch : {} completed out of {}, loss : {}, accuracy : {}, acc2: {}'.format(epoch, EPOCHS,
                                                                                        epoch_loss, accuracy_, ev_accuracy_))
        except KeyboardInterrupt:
            print('Training interrupted at {}'.format(epoch))
        finally:
            train_writer.close()

        saver.save(sess, CHECKPOINT_PATH + MODEL_NAME, global_step=epoch)

        x_ = data.test.images.reshape((-1, NUM_CHUNKS, CHUNK_SIZE))
        y_ = data.test.labels
        y_[y_ == 0] = -1

        accuracy_ = sess.run(accuracy, feed_dict={x: x_, y: y_,
                                                  h: np.zeros([10000, CELL_SIZE]),
                                                  p_keep: 1.0})

        print('Accuracy : {}'.format(accuracy_))

In [9]:
train_neural_network(x)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Restoring parameters from models/model19Wed Aug 30 01:04:33 2017.ckpt-1
Epoch : 0 completed out of 300, loss : 50.83574295043945, accuracy : 0.9912109375, acc2: 0.9882000684738159
Epoch : 1 completed out of 300, loss : 72.637451171875, accuracy : 0.984375, acc2: 0.9884001016616821
Epoch : 2 completed out of 300, loss : 70.63207244873047, accuracy : 0.9921875, acc2: 0.9899001121520996
Epoch : 3 completed out of 300, loss : 51.81064224243164, accuracy : 0.9912109375, acc2: 0.987200140953064
Epoch : 4 completed out of 300, loss : 73.22046661376953, accuracy : 0.986328125, acc2: 0.9879001379013062
Epoch : 5 completed out of 300, loss : 48.34013748168945, accuracy : 0.9931640625, acc2: 0.9873001575469971
Epoch : 6 completed out of 300, loss : 54.482139587402344, accuracy : 0.990234375, acc2: 0.987200140953064
Epoch : 7 completed out of 300, loss : 78.09452819824219, accuracy : 0.98828125, acc2: 0.9869001507759094
Epoch : 8 completed out of 300, loss : 73.40768432617188, accu

Epoch : 75 completed out of 300, loss : 41.961246490478516, accuracy : 0.990234375, acc2: 0.9890000820159912
Epoch : 76 completed out of 300, loss : 24.785938262939453, accuracy : 0.998046875, acc2: 0.9894000887870789
Epoch : 77 completed out of 300, loss : 8.843579292297363, accuracy : 0.9990234375, acc2: 0.9885001182556152
Epoch : 78 completed out of 300, loss : 10.08644962310791, accuracy : 1.0, acc2: 0.9879001379013062
Epoch : 79 completed out of 300, loss : 29.869321823120117, accuracy : 0.9970703125, acc2: 0.9888001680374146
Epoch : 80 completed out of 300, loss : 51.996299743652344, accuracy : 0.990234375, acc2: 0.989500105381012
Epoch : 81 completed out of 300, loss : 17.292808532714844, accuracy : 0.9970703125, acc2: 0.9885000586509705
Epoch : 82 completed out of 300, loss : 46.54772186279297, accuracy : 0.9921875, acc2: 0.9889001846313477
Epoch : 83 completed out of 300, loss : 35.419437408447266, accuracy : 0.99609375, acc2: 0.9887001514434814
Epoch : 84 completed out of 300

Epoch : 151 completed out of 300, loss : 31.975109100341797, accuracy : 0.994140625, acc2: 0.9892001152038574
Epoch : 152 completed out of 300, loss : 26.379606246948242, accuracy : 0.99609375, acc2: 0.9887001514434814
Epoch : 153 completed out of 300, loss : 34.62443542480469, accuracy : 0.9951171875, acc2: 0.9886001944541931
Epoch : 154 completed out of 300, loss : 37.279781341552734, accuracy : 0.9951171875, acc2: 0.9896001219749451
Epoch : 155 completed out of 300, loss : 36.88605880737305, accuracy : 0.994140625, acc2: 0.9866000413894653
Epoch : 156 completed out of 300, loss : 39.77861022949219, accuracy : 0.9931640625, acc2: 0.9882000684738159
Epoch : 157 completed out of 300, loss : 30.79017448425293, accuracy : 0.9970703125, acc2: 0.9879001379013062
Epoch : 158 completed out of 300, loss : 32.08930587768555, accuracy : 0.994140625, acc2: 0.9883002042770386
Epoch : 159 completed out of 300, loss : 34.51637268066406, accuracy : 0.994140625, acc2: 0.9889000654220581
Epoch : 160 c

Epoch : 226 completed out of 300, loss : 16.0700626373291, accuracy : 0.9990234375, acc2: 0.9870001673698425
Epoch : 227 completed out of 300, loss : 30.980058670043945, accuracy : 0.99609375, acc2: 0.9891001582145691
Epoch : 228 completed out of 300, loss : 24.805479049682617, accuracy : 0.9970703125, acc2: 0.9880001544952393
Epoch : 229 completed out of 300, loss : 8.773942947387695, accuracy : 0.998046875, acc2: 0.9887001514434814
Epoch : 230 completed out of 300, loss : 16.741113662719727, accuracy : 0.998046875, acc2: 0.9881001710891724
Epoch : 231 completed out of 300, loss : 4.540127754211426, accuracy : 1.0, acc2: 0.9889001250267029
Epoch : 232 completed out of 300, loss : 18.489686965942383, accuracy : 0.9970703125, acc2: 0.9886000752449036
Epoch : 233 completed out of 300, loss : 14.728338241577148, accuracy : 0.998046875, acc2: 0.9877000451087952
Epoch : 234 completed out of 300, loss : 18.962905883789062, accuracy : 0.9970703125, acc2: 0.9882001876831055
Epoch : 235 complet