In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
mnist = input_data.read_data_sets('',one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting t10k-labels-idx1-ubyte.gz


In [3]:
#Data fetch
x_train = mnist.train.images
y_train = mnist.train.labels
x_test = mnist.test.images
y_test = mnist.test.labels

In [4]:
#Method definations
def weight_variable(shape):
    vals = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(vals)

In [5]:
def weight_bias(shape):
    vals = tf.constant(0.1, shape=shape)
    return tf.Variable(vals)

In [6]:
def conv(X, W):
    return tf.nn.conv2d(X,W,strides=[1,1,1,1], padding='SAME')

In [7]:
def pool(X):
    return tf.nn.max_pool(X,ksize=[1,2,2,1],
                          strides=[1,2,2,1],padding='SAME')

In [8]:
#Initialization
X = tf.placeholder(tf.float32, shape=(None,784))
Y = tf.placeholder(tf.float32,shape=(None,10))

In [9]:
x_image = tf.reshape(X,[-1,28,28,1])

In [10]:
print(x_image.shape)

(?, 28, 28, 1)


In [11]:
#First Convolution Layer input (28,28,1) => output (28, 28, 32)
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = weight_bias([32])
h_conv1 = tf.nn.relu(conv(x_image,W_conv1)+b_conv1)
print(W_conv1.shape, b_conv1.shape, h_conv1.shape)

(5, 5, 1, 32) (32,) (?, 28, 28, 32)


In [12]:
#First Pooling LAyer input (28,28,32) => output (14, 14, 32)
h_pool1 = pool(h_conv1)
print(h_pool1.shape)

(?, 14, 14, 32)


In [13]:
#Second Convolution Layer input (14,14,32) => output (14, 14, 64)
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = weight_bias([64])
h_conv2 = tf.nn.relu(conv(h_pool1,W_conv2)+b_conv2)
print(W_conv2.shape, b_conv2.shape, h_conv2.shape)

(5, 5, 32, 64) (64,) (?, 14, 14, 64)


In [14]:
#Second Pooling Layer input (14,14,64) => output (7, 7, 64)
h_pool2 = pool(h_conv2)
print(h_pool2.shape)

(?, 7, 7, 64)


In [15]:
#First dense layer 
W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = weight_bias([1024])

In [16]:
h_pool2_flat = tf.reshape(h_pool2, shape=[-1,7*7*64]) #Reshape
print(h_pool2_flat.shape)

(?, 3136)


In [17]:
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1) #Model

In [18]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #Dropout

In [19]:
#Second dense layer 
W_fc2 = weight_variable([1024,10])
b_fc2 = weight_bias([10])

In [20]:
y_conv = tf.matmul(h_fc1_drop,W_fc2)+b_fc2 #Model

In [21]:
#Loss calculation
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y_conv))

In [22]:
#regularization
regularizer = (tf.nn.l2_loss(W_fc1)+tf.nn.l2_loss(b_fc1)+
              tf.nn.l2_loss(W_fc2)+tf.nn.l2_loss(b_fc2))

In [23]:
loss += 5e-4*regularizer

In [24]:
#Changing weights during the epochs
batch = tf.Variable(0, tf.float32)
learning_rate = tf.train.exponential_decay(
    0.01,
    batch*50, #50 because of batch size
    x_train.shape[0],
    0.95,
    staircase=True
)

In [25]:
#Trainer/Optimizer
opt = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss,
                                                             global_step=batch)

In [26]:
#opt = tf.train.AdamOptimizer(0.0001).minimize(loss)

In [27]:
#Accuracy Calculation
accuracy = tf.reduce_mean(tf.cast
                          (tf.equal
                           (tf.argmax(y_conv,1), 
                            tf.argmax(Y,1)),
                           tf.float32)
                         )

In [28]:
s = tf.Session()

In [29]:
s.run(tf.global_variables_initializer())

In [30]:
#Training
for i in range(55000): #for 100 epochs
    batch = mnist.train.next_batch(50)
    s.run(opt,{X:batch[0],Y:batch[1],keep_prob:0.5})
    if i%550 == 0:
        acc = s.run(accuracy,{X:x_test,Y:y_test,keep_prob:1.0})*100
        print("Accuracy at %d-th Epoch : %f"%(i/550,acc))

Accuracy at 0-th Epoch : 9.830000
Accuracy at 1-th Epoch : 97.310001
Accuracy at 2-th Epoch : 98.060000
Accuracy at 3-th Epoch : 98.530000
Accuracy at 4-th Epoch : 98.439997
Accuracy at 5-th Epoch : 98.780000
Accuracy at 6-th Epoch : 98.869997
Accuracy at 7-th Epoch : 98.979998
Accuracy at 8-th Epoch : 98.869997
Accuracy at 9-th Epoch : 99.080002
Accuracy at 10-th Epoch : 98.970002
Accuracy at 11-th Epoch : 99.140000
Accuracy at 12-th Epoch : 99.100000
Accuracy at 13-th Epoch : 99.100000
Accuracy at 14-th Epoch : 99.129999
Accuracy at 15-th Epoch : 99.210000
Accuracy at 16-th Epoch : 99.040002
Accuracy at 17-th Epoch : 99.190003
Accuracy at 18-th Epoch : 99.089998
Accuracy at 19-th Epoch : 99.210000
Accuracy at 20-th Epoch : 99.150002
Accuracy at 21-th Epoch : 99.199998
Accuracy at 22-th Epoch : 99.110001
Accuracy at 23-th Epoch : 99.269998
Accuracy at 24-th Epoch : 99.260002
Accuracy at 25-th Epoch : 99.309999
Accuracy at 26-th Epoch : 99.269998
Accuracy at 27-th Epoch : 99.229997
Acc

In [31]:
#Accuracy Calculation
print(s.run(accuracy,{X:mnist.test.images, Y:mnist.test.labels, keep_prob:1.0}))

0.9934
