In [1]:
import tensorflow as tf
import math
from tensorflow.examples.tutorials.mnist import input_data as mnist_data

In [2]:
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)

Tensorflow version 1.6.0


In [3]:
mnist = mnist_data.read_data_sets("data",
                                  one_hot=True,
                                  reshape=False,
                                  validation_size=0)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


<img src="img/cnn_arch.png">

In [4]:
# input X: 28x28 grayscale images,
# the first dimension (None) will index
# the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])

N = 100 # Batch Size

In [5]:
W1 = tf.Variable(tf.truncated_normal([5, 5, 1, 4], stddev=0.1))
B1 = tf.Variable(tf.ones([4])/10)

W2 = tf.Variable(tf.truncated_normal([4, 4, 4, 8], stddev=0.1))
B2 = tf.Variable(tf.ones([8])/10)

W3 = tf.Variable(tf.truncated_normal([4, 4, 8, 12], stddev=0.1))
B3 = tf.Variable(tf.ones([12])/10)

W4 = tf.Variable(tf.truncated_normal([7 * 7 * 12, 200], stddev=0.1))
B4 = tf.Variable(tf.ones([200])/10)

W5 = tf.Variable(tf.truncated_normal([200, 10], stddev=0.1))
B5 = tf.Variable(tf.ones([10])/10)

In [7]:
"""
Must have strides[0] = strides[3] = 1.
For the most common case of the same horizontal and vertices strides,
strides = [1, stride, stride, 1].
"""

HC1 = tf.nn. conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
# (HC1) : (28, 28, 4)

HC2 = tf.nn. conv2d(HC1, W2, strides=[1, 2, 2, 1], padding='SAME')
# (HC2) : (14, 14, 8)

HC3 = tf.nn. conv2d(HC2, W3, strides=[1, 2, 2, 1], padding='SAME')
# (HC3) : (7, 7, 12)

H3 = tf.reshape(HC3, [-1, 7 * 7 * 12])

H4 = tf.nn.relu(tf.matmul(H3, W4) + B4)

H5 = tf.matmul(H4, W5) + B5
Y = tf.nn.softmax(H5)

In [8]:
# *10 since we really want sum of loss and not mean
# cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 10.0

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=H5, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * N

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
# training, learning rate = 0.005
# You always forget to optimize!
# Optimizer begin backprop process

alpha = 0.005

# Vanilla Gradient Descent
# train_step = tf.train.GradientDescentOptimizer(alpha).minimize(cross_entropy)

step = tf.placeholder(tf.int32)

"""
tf.train.exponential_decay(
    learning_rate,
    global_step,
    decay_steps,
    decay_rate,
    staircase=False,
    name=None
)

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
"""

lr = 0.0001 + tf.train.exponential_decay(alpha, step, 2000, 1/math.e)

# Adam Optimizer
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [10]:
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [11]:
iterations = 7000
train_data_log_freq = 500

In [13]:
for i in range(iterations):
    # training on batches of 100 images with N labels
    batch_X, batch_Y = mnist.train.next_batch(N)
    train_data = {
        X: batch_X,
        Y_: batch_Y,
        step: i
    }
    
    """
    I was stuck coz sess.run([accuracy, cross_entropy] 
    was before sess.run(train_step
    why would the order matter here?
    since train_step includes cross_entropy as a dependecy
    """
    
    sess.run(train_step, feed_dict = train_data)
    
    train_acc, train_loss = sess.run([accuracy, cross_entropy],
                    feed_dict = train_data)

    test_data = {
        X: mnist.test.images,
        Y_: mnist.test.labels
    }
    test_acc, test_loss = sess.run([accuracy, cross_entropy], feed_dict = test_data)
    
    if i % train_data_log_freq == 0:
        print("Train: {:>5d} | Acc: {:>5.4f} | Loss: {:>8.4f}".format(i, train_acc, train_loss))
        print("Test:: {:>5d} | Acc: {:>5.4f} | Loss: {:>8.4f}".format(i, test_acc, test_loss))
        print("")
    

Train:     0 | Acc: 0.6700 | Loss: 188.8750
Test::     0 | Acc: 0.4534 | Loss: 206.3295

Train:   500 | Acc: 0.9600 | Loss:  14.3500
Test::   500 | Acc: 0.9694 | Loss:  10.4860

Train:  1000 | Acc: 1.0000 | Loss:   1.2848
Test::  1000 | Acc: 0.9754 | Loss:   8.2638

Train:  1500 | Acc: 0.9800 | Loss:   7.0434
Test::  1500 | Acc: 0.9749 | Loss:   8.3555

Train:  2000 | Acc: 1.0000 | Loss:   2.8607
Test::  2000 | Acc: 0.9798 | Loss:   6.6627

Train:  2500 | Acc: 1.0000 | Loss:   0.5011
Test::  2500 | Acc: 0.9820 | Loss:   6.3080

Train:  3000 | Acc: 1.0000 | Loss:   0.5392
Test::  3000 | Acc: 0.9820 | Loss:   6.4922

Train:  3500 | Acc: 1.0000 | Loss:   0.0834
Test::  3500 | Acc: 0.9834 | Loss:   7.3943

Train:  4000 | Acc: 1.0000 | Loss:   0.1037
Test::  4000 | Acc: 0.9829 | Loss:   7.9830

Train:  4500 | Acc: 1.0000 | Loss:   0.0708
Test::  4500 | Acc: 0.9840 | Loss:   8.1747

Train:  5000 | Acc: 1.0000 | Loss:   0.1188
Test::  5000 | Acc: 0.9840 | Loss:   8.7232

Train:  5500 | Acc: 1