In [1]:
import tensorflow as tf
import math
from tensorflow.examples.tutorials.mnist import input_data as mnist_data

In [2]:
print("Tensorflow version " + tf.__version__)
tf.set_random_seed(0)

Tensorflow version 1.6.0


In [3]:
mnist = mnist_data.read_data_sets("data",
                                  one_hot=True,
                                  reshape=False,
                                  validation_size=0)

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


<img src="img/cnn_arch2.png">

In [4]:
# input X: 28x28 grayscale images,
# the first dimension (None) will index
# the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])

# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])

N = 100 # Batch Size

In [5]:
W1 = tf.Variable(tf.truncated_normal([6, 6, 1, 6], stddev=0.1))
B1 = tf.Variable(tf.ones([6])/10)

W2 = tf.Variable(tf.truncated_normal([5, 5, 6, 12], stddev=0.1))
B2 = tf.Variable(tf.ones([12])/10)

W3 = tf.Variable(tf.truncated_normal([4, 4, 12, 24], stddev=0.1))
B3 = tf.Variable(tf.ones([24])/10)

W4 = tf.Variable(tf.truncated_normal([7 * 7 * 24, 200], stddev=0.1))
B4 = tf.Variable(tf.ones([200])/10)

pdrop = tf.placeholder(tf.float32)

W5 = tf.Variable(tf.truncated_normal([200, 10], stddev=0.1))
B5 = tf.Variable(tf.ones([10])/10)

In [6]:
"""
Must have strides[0] = strides[3] = 1.
For the most common case of the same horizontal and vertices strides,
strides = [1, stride, stride, 1].
"""

HC1 = tf.nn. conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
# (HC1) : (28, 28, 6)

HC2 = tf.nn. conv2d(HC1, W2, strides=[1, 2, 2, 1], padding='SAME')
# (HC2) : (14, 14, 12)

HC3 = tf.nn. conv2d(HC2, W3, strides=[1, 2, 2, 1], padding='SAME')
# (HC3) : (7, 7, 24)

H3 = tf.reshape(HC3, [-1, 7 * 7 * 24])

H4 = tf.nn.relu(tf.matmul(H3, W4) + B4)

pdrop = tf.placeholder(tf.float32)
H4d = tf.nn.dropout(H4, pdrop)

H5 = tf.matmul(H4d, W5) + B5
Y = tf.nn.softmax(H5)

In [7]:
# *10 since we really want sum of loss and not mean
# cross_entropy = -tf.reduce_mean(Y_ * tf.log(Y)) * 10.0

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=H5, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy) * N

correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [8]:
# training, learning rate = 0.005
# You always forget to optimize!
# Optimizer begin backprop process

alpha = 0.005

# Vanilla Gradient Descent
# train_step = tf.train.GradientDescentOptimizer(alpha).minimize(cross_entropy)

step = tf.placeholder(tf.int32)

"""
tf.train.exponential_decay(
    learning_rate,
    global_step,
    decay_steps,
    decay_rate,
    staircase=False,
    name=None
)

decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
"""

lr = 0.0001 + tf.train.exponential_decay(alpha, step, 2000, 1/math.e)

# Adam Optimizer
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

In [9]:
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [10]:
iterations = 10000 + 1
train_data_log_freq = 500

In [12]:
for i in range(iterations):
    # training on batches of 100 images with N labels
    batch_X, batch_Y = mnist.train.next_batch(N)
    train_data = {
        X: batch_X,
        Y_: batch_Y,
        step: i,
        pdrop: 0.75
    }
    
    """
    I was stuck coz sess.run([accuracy, cross_entropy] 
    was before sess.run(train_step
    why would the order matter here?
    since train_step includes cross_entropy as a dependecy
    """
    
    sess.run(train_step, feed_dict = train_data)
    
    train_acc, train_loss = sess.run([accuracy, cross_entropy],
                    feed_dict = train_data)

    test_data = {
        X: mnist.test.images,
        Y_: mnist.test.labels,
        pdrop: 1.00
    }
    test_acc, test_loss = sess.run([accuracy, cross_entropy], feed_dict = test_data)
    
    if i % train_data_log_freq == 0:
        print("Train: {:>5d} | Acc: {:>5.4f} | Loss: {:>8.4f}".format(i, train_acc, train_loss))
        print("Test:: {:>5d} | Acc: {:>5.4f} | Loss: {:>8.4f}".format(i, test_acc, test_loss))
        print("")
    

Train:     0 | Acc: 0.3600 | Loss: 169.8403
Test::     0 | Acc: 0.2427 | Loss: 212.5465

Train:   500 | Acc: 0.9500 | Loss:  22.8221
Test::   500 | Acc: 0.9619 | Loss:  12.8596

Train:  1000 | Acc: 0.9900 | Loss:   3.5211
Test::  1000 | Acc: 0.9713 | Loss:   9.4003

Train:  1500 | Acc: 0.9900 | Loss:   6.3349
Test::  1500 | Acc: 0.9727 | Loss:   8.5962

Train:  2000 | Acc: 0.9400 | Loss:  18.9446
Test::  2000 | Acc: 0.9783 | Loss:   7.4854

Train:  2500 | Acc: 0.9800 | Loss:   5.9600
Test::  2500 | Acc: 0.9814 | Loss:   6.5217

Train:  3000 | Acc: 0.9900 | Loss:   3.3575
Test::  3000 | Acc: 0.9814 | Loss:   6.4829

Train:  3500 | Acc: 0.9900 | Loss:   2.9569
Test::  3500 | Acc: 0.9828 | Loss:   6.2966

Train:  4000 | Acc: 1.0000 | Loss:   1.6772
Test::  4000 | Acc: 0.9834 | Loss:   6.4943

Train:  4500 | Acc: 0.9900 | Loss:   2.9569
Test::  4500 | Acc: 0.9841 | Loss:   6.1835

Train:  5000 | Acc: 0.9900 | Loss:   3.6415
Test::  5000 | Acc: 0.9833 | Loss:   6.6927

Train:  5500 | Acc: 1