# Convolutional Neural Network on MNIST Dataset
## Krishna Thiyagarajan
## ECE - 411 - Computational Graphs for Machine Learning
## Professor Chris Curro
## Homework Assignment #3a
## February 18, 2017

In [1]:
import warnings
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
warnings.filterwarnings('ignore')

# Import MNIST data
mnist = input_data.read_data_sets("data/", one_hot=True)

num_inputs = 784
num_classes = 10

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


In [2]:
def def_weight(shape, name):
    var = tf.get_variable(name = name, dtype = tf.float32, shape = shape, initializer = tf.contrib.layers.xavier_initializer_conv2d())
    tf.add_to_collection('model_vars', var)
    tf.add_to_collection('l2', tf.reduce_sum(tf.square(var)))
    return var

def def_bias(shape, name):
    var = tf.get_variable(name = name, dtype = tf.float32, shape = shape, initializer = tf.constant_initializer(0.0))
    tf.add_to_collection('model_vars', var)
    tf.add_to_collection('l2', tf.reduce_sum(tf.square(var)))
    return var

class ConvANN:
    def __init__(self, sess, weight_dim, bias_dim, iterations, batch_size, learn_rate, gamma,  display_steps=100, num_in=num_inputs, num_class=num_classes):
        self.sess = sess
        self.num_inputs = num_in
        self.num_classes = num_class
        self.weight_dims = weight_dim
        self.bias_dims = bias_dim
        self.iterations = iterations
        self.batch_size = batch_size
        self.display_steps = display_steps
        self.learn_rate = learn_rate
        self.gamma = gamma
        self.x = tf.placeholder(tf.float32, [None, self.num_inputs])
        self.y = tf.placeholder(tf.float32, [None, self.num_classes])
        self.dropout = tf.placeholder(tf.float32)
        self.build_model()

    def conv2d(self, x, w, b, stride=1):
        x = tf.nn.conv2d(x, w, strides=[1, stride, stride, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        return tf.nn.relu(x)

    def maxpool2d(self, x, k=2):
        return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

    def build_model(self):
        x = tf.reshape(self.x, shape=[-1, 28, 28, 1])

        self.yhat = self.conv2d(x, self.weight_dims['w1'], self.bias_dims['b1'])
        self.yhat = self.maxpool2d(self.yhat)
        self.yhat = self.conv2d(self.yhat, self.weight_dims['w2'], self.bias_dims['b2'])
        self.yhat = self.maxpool2d(self.yhat)

        self.yhat = tf.reshape(self.yhat, [-1, self.weight_dims['w3'].get_shape().as_list()[0]])
        self.yhat = tf.add(tf.matmul(self.yhat, self.weight_dims['w3']), self.bias_dims['b3'])
        self.yhat = tf.nn.relu(self.yhat)

        self.yhat = tf.nn.dropout(self.yhat, self.dropout)

        self.yhat = tf.add(tf.matmul(self.yhat, self.weight_dims['w4']), self.bias_dims['b4'])

        self.costs = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.yhat, labels=self.y))
        self.l2 = tf.reduce_sum(tf.get_collection('l2'))
        self.loss = self.costs + self.gamma * self.l2

        self.correct_pred = tf.equal(tf.argmax(self.yhat, 1), tf.argmax(self.y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))

    def train(self):
    	model_vars = tf.get_collection('model_vars')
    	self.optim = (tf.train.AdamOptimizer(learning_rate=self.learn_rate).minimize(self.loss, var_list=model_vars))
    	self.sess.run(tf.global_variables_initializer())

    	for kk in range(self.iterations):
    		batch_x, batch_y = mnist.train.next_batch(self.batch_size)
    		self.sess.run([self.optim], feed_dict={self.x: batch_x, self.y: batch_y, self.dropout: 0.75})
    		if kk % self.display_steps == 0:
    			loss = self.sess.run(self.loss, feed_dict={self.x: batch_x, self.y: batch_y, self.dropout: 1.0})
    			print("Step: %d, Loss: %f" % (kk, loss))
    	print("Optimization complete!")
    	self.valid_accuracy()

    def valid_accuracy(self):
    	acc = self.sess.run(self.accuracy, feed_dict={self.x: mnist.validation.images[:1000], self.y: mnist.validation.labels[:1000], self.dropout: 1.0})
    	print("Validation Accuracy: ", acc)

    def test_accuracy(self):
    	acc = self.sess.run(self.accuracy, feed_dict={self.x: mnist.test.images[:500], self.y: mnist.test.labels[:500], self.dropout: 1.0})
    	print("Test Accuracy: ", acc)

In [3]:
# Run 1

sess_1 = tf.Session()

weight_dim_1 = {
    'w1': def_weight([5, 5, 1, 32], 'w11'),
    'w2': def_weight([5, 5, 32, 64], 'w12'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w13'),
    'w4': def_weight([1024, num_classes], 'w14')
}

bias_dim_1= {
    'b1': def_bias([32], 'b11'),
    'b2': def_bias([64], 'b12'),
    'b3': def_bias([1024], 'b13'),
    'b4': def_bias([num_classes], 'b14')
}

runs_1=1000

minibatch_1 = 64
learnRate_1 = 1e-3
gamma_1 = 1e-4


model_1 = ConvANN(sess_1, weight_dim_1, bias_dim_1, runs_1, minibatch_1, learnRate_1, gamma_1)

model_1.train()

Step: 0, Loss: 2.215885
Step: 100, Loss: 0.190638
Step: 200, Loss: 0.196720
Step: 300, Loss: 0.160326
Step: 400, Loss: 0.219079
Step: 500, Loss: 0.278651
Step: 600, Loss: 0.204617
Step: 700, Loss: 0.118456
Step: 800, Loss: 0.095850
Step: 900, Loss: 0.120182
Optimization complete!
Validation Accuracy:  0.985


In [4]:
# Run 2

sess_2 = tf.Session()

weight_dim_2 = {
    'w1': def_weight([5, 5, 1, 32], 'w21'),
    'w2': def_weight([5, 5, 32, 64], 'w22'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w23'),
    'w4': def_weight([1024, num_classes], 'w24')
}

bias_dim_2= {
    'b1': def_bias([32], 'b21'),
    'b2': def_bias([64], 'b22'),
    'b3': def_bias([1024], 'b23'),
    'b4': def_bias([num_classes], 'b24')
}

runs_2=2000

minibatch_2 = 64
learnRate_2 = 1e-3
gamma_2 = 1e-4



model_2 = ConvANN(sess_2, weight_dim_2, bias_dim_2, runs_2, minibatch_2, learnRate_2, gamma_2)

model_2.train()

Step: 0, Loss: 2.393782
Step: 100, Loss: 0.384075
Step: 200, Loss: 0.132534
Step: 300, Loss: 0.140111
Step: 400, Loss: 0.181190
Step: 500, Loss: 0.134737
Step: 600, Loss: 0.128377
Step: 700, Loss: 0.280562
Step: 800, Loss: 0.069391
Step: 900, Loss: 0.108024
Step: 1000, Loss: 0.105062
Step: 1100, Loss: 0.083044
Step: 1200, Loss: 0.093895
Step: 1300, Loss: 0.084782
Step: 1400, Loss: 0.068128
Step: 1500, Loss: 0.067210
Step: 1600, Loss: 0.069318
Step: 1700, Loss: 0.061077
Step: 1800, Loss: 0.188755
Step: 1900, Loss: 0.076858
Optimization complete!
Validation Accuracy:  0.981


In [5]:
# Run 3

sess_3 = tf.Session()

weight_dim_3 = {
    'w1': def_weight([5, 5, 1, 32], 'w31'),
    'w2': def_weight([5, 5, 32, 64], 'w32'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w33'),
    'w4': def_weight([1024, num_classes], 'w34')
}

bias_dim_3= {
    'b1': def_bias([32], 'b31'),
    'b2': def_bias([64], 'b32'),
    'b3': def_bias([1024], 'b33'),
    'b4': def_bias([num_classes], 'b34')
}

runs_3=2000

minibatch_3 = 32
learnRate_3 = 1e-3
gamma_3 = 1e-4



model_3 = ConvANN(sess_3, weight_dim_3, bias_dim_3, runs_3, minibatch_3, learnRate_3, gamma_3)

model_3.train()

Step: 0, Loss: 2.447249
Step: 100, Loss: 0.333082
Step: 200, Loss: 0.302939
Step: 300, Loss: 0.151420
Step: 400, Loss: 0.200318
Step: 500, Loss: 0.223454
Step: 600, Loss: 0.103670
Step: 700, Loss: 0.103654
Step: 800, Loss: 0.117292
Step: 900, Loss: 0.081809
Step: 1000, Loss: 0.102687
Step: 1100, Loss: 0.076788
Step: 1200, Loss: 0.091380
Step: 1300, Loss: 0.076963
Step: 1400, Loss: 0.148603
Step: 1500, Loss: 0.086423
Step: 1600, Loss: 0.097505
Step: 1700, Loss: 0.091366
Step: 1800, Loss: 0.093944
Step: 1900, Loss: 0.074915
Optimization complete!
Validation Accuracy:  0.985


In [6]:
# Run 4

sess_4 = tf.Session()

weight_dim_4 = {
    'w1': def_weight([5, 5, 1, 32], 'w41'),
    'w2': def_weight([5, 5, 32, 64], 'w42'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w43'),
    'w4': def_weight([1024, num_classes], 'w44')
}

bias_dim_4= {
    'b1': def_bias([32], 'b41'),
    'b2': def_bias([64], 'b42'),
    'b3': def_bias([1024], 'b43'),
    'b4': def_bias([num_classes], 'b44')
}

runs_4 = 2000

minibatch_4 = 32
learnRate_4 = 1e-2
gamma_4 = 1e-4



model_4 = ConvANN(sess_4, weight_dim_4, bias_dim_4, runs_4, minibatch_4, learnRate_4, gamma_4)

model_4.train()

Step: 0, Loss: 13.727271
Step: 100, Loss: 2.476243
Step: 200, Loss: 2.439242
Step: 300, Loss: 2.346537
Step: 400, Loss: 2.344593
Step: 500, Loss: 2.330914
Step: 600, Loss: 2.309442
Step: 700, Loss: 2.333476
Step: 800, Loss: 2.326510
Step: 900, Loss: 2.280150
Step: 1000, Loss: 2.313780
Step: 1100, Loss: 2.300328
Step: 1200, Loss: 2.294148
Step: 1300, Loss: 2.298687
Step: 1400, Loss: 2.293363
Step: 1500, Loss: 2.323333
Step: 1600, Loss: 2.313638
Step: 1700, Loss: 2.307415
Step: 1800, Loss: 2.304341
Step: 1900, Loss: 2.333972
Optimization complete!
Validation Accuracy:  0.116


In [7]:
# Run 5

sess_5 = tf.Session()

weight_dim_5 = {
    'w1': def_weight([5, 5, 1, 32], 'w51'),
    'w2': def_weight([5, 5, 32, 64], 'w52'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w53'),
    'w4': def_weight([1024, num_classes], 'w54')
}

bias_dim_5= {
    'b1': def_bias([32], 'b51'),
    'b2': def_bias([64], 'b52'),
    'b3': def_bias([1024], 'b53'),
    'b4': def_bias([num_classes], 'b54')
}

runs_5 = 5000

minibatch_5 = 16
learnRate_5 = 1e-2
gamma_5 = 1e-4



model_5 = ConvANN(sess_5, weight_dim_5, bias_dim_5, runs_5, minibatch_5, learnRate_5, gamma_5)

model_5.train()

Step: 0, Loss: 5.038886
Step: 100, Loss: 2.417109
Step: 200, Loss: 2.399310
Step: 300, Loss: 2.324419
Step: 400, Loss: 2.320112
Step: 500, Loss: 2.325422
Step: 600, Loss: 2.309214
Step: 700, Loss: 2.355353
Step: 800, Loss: 2.311794
Step: 900, Loss: 2.284373
Step: 1000, Loss: 2.277915
Step: 1100, Loss: 2.282344
Step: 1200, Loss: 2.266680
Step: 1300, Loss: 2.326157
Step: 1400, Loss: 2.275277
Step: 1500, Loss: 2.317041
Step: 1600, Loss: 2.298300
Step: 1700, Loss: 2.336502
Step: 1800, Loss: 2.312979
Step: 1900, Loss: 2.305370
Step: 2000, Loss: 2.305101
Step: 2100, Loss: 2.320704
Step: 2200, Loss: 2.346688
Step: 2300, Loss: 2.250865
Step: 2400, Loss: 2.339192
Step: 2500, Loss: 2.337123
Step: 2600, Loss: 2.277326
Step: 2700, Loss: 2.256661
Step: 2800, Loss: 2.325802
Step: 2900, Loss: 2.287292
Step: 3000, Loss: 2.298874
Step: 3100, Loss: 2.287511
Step: 3200, Loss: 2.290134
Step: 3300, Loss: 2.319763
Step: 3400, Loss: 2.320627
Step: 3500, Loss: 2.295411
Step: 3600, Loss: 2.293262
Step: 3700, L

In [8]:
# Run 6

import tensorflow as tf

sess_6 = tf.Session()

weight_dim_6 = {
    'w1': def_weight([5, 5, 1, 32], 'w_61'),
    'w2': def_weight([5, 5, 32, 64], 'w_62'),
    'w3': def_weight([7 * 7 * 64, 1024], 'w_63'),
    'w4': def_weight([1024, num_classes], 'w_64')
}

bias_dim_6 = {
    'b1': def_bias([32], 'b_61'),
    'b2': def_bias([64], 'b_62'),
    'b3': def_bias([1024], 'b_63'),
    'b4': def_bias([num_classes], 'b_64')
}

runs_6 = 20000

minibatch_6 = 50
learnRate_6 = 1e-3
gamma_6 = 1e-5



model_6 = ConvANN(sess_6, weight_dim_6, bias_dim_6, runs_6, minibatch_6, learnRate_6, gamma_6, display_steps=1000)

model_6.train()

Step: 0, Loss: 2.178702
Step: 1000, Loss: 0.067084
Step: 2000, Loss: 0.026480
Step: 3000, Loss: 0.021406
Step: 4000, Loss: 0.030152
Step: 5000, Loss: 0.026771
Step: 6000, Loss: 0.050622
Step: 7000, Loss: 0.027832
Step: 8000, Loss: 0.032912
Step: 9000, Loss: 0.034477
Step: 10000, Loss: 0.022863
Step: 11000, Loss: 0.112639
Step: 12000, Loss: 0.023649
Step: 13000, Loss: 0.022876
Step: 14000, Loss: 0.023728
Step: 15000, Loss: 0.023915
Step: 16000, Loss: 0.022817
Step: 17000, Loss: 0.021602
Step: 18000, Loss: 0.020760
Step: 19000, Loss: 0.021752
Optimization complete!
Validation Accuracy:  0.995


In [9]:
model_6.test_accuracy()

Test Accuracy:  0.988
