## Initializing Weights

In [2]:
import tensorflow as tf

from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [None]:
#The images are flattened. We have a 1-D array of images. Instead of the image having 28 rows and 28 columns, we have
# one row with 784 pixel values. We neeed to reshape 784 to 28*28 size and pass it through CNN layer 1, followed
# by maxpooling layer 1, followed by CNN layer 2, followed by maxpooling layer 2. The O/P obtained is flattened 
#and passed through the dense layer and then the O/P layer. We implement the same below

In [3]:
#Initializing all the constants

input_width=28
input_height=28
input_channels=1
input_pixels=784

n_conv1=32
n_conv2=64
stride_conv1=1
stride_conv2=1
conv1_k=5
conv2_k=5
max_pool1_k=2
max_pool2_k=2

n_hidden=1024
n_out=10
input_size_to_hidden=(input_width//(max_pool1_k*max_pool2_k))*(input_height//(max_pool1_k*max_pool2_k))*n_conv2

In [4]:
#Initializing the weights and biases

#There are no weights in the pooling layer

weights={
    #CNN layer1 weights would be 5*5*1*32(k*k=5*5, input_channels=1, n_conv1=32)
        "wc1":tf.Variable(tf.random_normal((conv1_k, conv1_k, input_channels, n_conv1))),
    
    #CNN layer2 weights would be 5*5*32*64(k*k=5*5, channels of CNN layer2=32, n_conv2=64)
        "wc2":tf.Variable(tf.random_normal((conv2_k, conv2_k, n_conv1, n_conv2))),
    
    #Hidden layer weights would be (input height divided by 4) *(input width divided by 4) * 64 * 1024
    #(divided by 4 since max pool applied twice) --> done above as input_size_to_hidden
        "wh1":tf.Variable(tf.random_normal((input_size_to_hidden, n_hidden))),
    
    #Output weights would be 1024*10
        "wo":tf.Variable(tf.random_normal((n_hidden, n_out)))}

biases={
    "bc1":tf.Variable(tf.random_normal((n_conv1, ))),
    "bc2":tf.Variable(tf.random_normal((n_conv2, ))),
    "bh1":tf.Variable(tf.random_normal((n_hidden, ))),
    "bo":tf.Variable(tf.random_normal((n_out, )))
}

## Forward Propagation in Tensor Flow

In [5]:
#Convolution and maxpool functions

#Convolution function.
#Incase of convolution function we need to do the convolution i.e. make our filter pass through the image and 
# create those new values. After that we add the biases. The default function in tensor flow which does the
# convolution does not add the biases. So we'll have to add the biases. We can have an activation function too that
# can be applied to the resultant of the convolution layer.

def conv(x, weights, bias, strides=1):
    #Applying filter on the images
    #Strides is a list which should be of the same shape as x --> n_images*input_width*input_height*input_channels
    #The first rgument is different different images. I'm not going to try and combine 2 images. Want to move 1 by 1.
    #So this part is always 1. Also, we won't move along the depth, we're going to exactly fit the filter with the
    #image that we have. Same as the number of channels. So both these arguments i.e. n_images & input_channels = 1.
    out=tf.nn.conv2d(x, weights, padding='SAME', strides=[1, strides, strides, 1])#k*k form for strides. 
    #Strides defined above
    
    #Adding the biases, Special function for adding bias. Allows to add different types of vectors into 1
    out=tf.nn.bias_add(out, bias) #could be done using tf.nn.add(out,bias) as well
    
    #Applying relu on the output
    out=tf.nn.relu(out)
    return out

#Maxpool function
def maxpooling(x, k=2): #k signifies what window you want to do the maxpooling on. (2*2 here)
    #ksize is the window size--> how to apply maxpooling. Say, I have 100 images, each being 10*10 with 5 channels
    # MAxpooling isn't intended to be applied across the images or across the channels. These both are denoted by 1.
    # We'll pass strides as well becuase once we've applied window size at one place, how much do we want it to move
    # by will be given by strides.
    return tf.nn.max_pool(x, padding='SAME', ksize=[1, k, k, 1], strides=[1, k, k, 1])

In [6]:
def cnn(x, weights, biases): #x is the input
    
    #Need to reshape our input
    #The shape of our input is 784 but we aren't just going to get one image. We'll get multiple images
    # and so in case of 100 images, shape would be 100*784
    # We want to reshape it to 100*28*28*1 (100*input_width*input_height*input channels)
    x=tf.reshape(x, shape=(-1, input_width, input_height, input_channels)) #Element to be reshaped and the desired shape
    #When you already know three of the required parameters for the shape, just write -1 for the fourth
    #as it will infer it for you
    
    #Pass it through CNN layer1. conv1 is the output of CNN layer1
    conv1=conv(x, weights['wc1'], biases['bc1'], stride_conv1) #Calling the conv function with x, weights and biases of 
    #CNN layer 1
    
    #Pass this through maxpool layer1
    conv1_pool=maxpooling(conv1, max_pool1_k) #Calling the maxpooling function with output of CNN layer 1 i.e. conv1 
    #and pooling size of the maxpool layer 1
    
    #Pass it through CNN layer 2. conv2 is the output of CNN layer2
    conv2=conv(conv1_pool, weights['wc2'], biases['bc2'], stride_conv2) #Calling the conv function with conv1_pool, 
    #weights and biases of CNN layer 2
    
    #Pass this through maxpool layer2
    conv2_pool=maxpooling(conv2, max_pool2_k) #Calling the maxpooling function with output of CNN layer 2 i.e. conv2
    #and pooling size of the maxpool layer 2
    
    #Input to the hidden layer will come by reshaping the conv2_pool. If conv2_pool was 7*7*64 and 100 such images
    # gave 100*7*7*64 then the shape would have been 100*7*7*64
    hidden_input=tf.reshape(conv2_pool, shape=(-1, input_size_to_hidden)) #The first element will be inferred by itself
    
    #We need to pass it through the dense layer
    #Multiply weights by input of the hidden layer, add biases and then apply activation function on it
    hidden_ouput_before_activation=tf.add(tf.matmul(hidden_input, weights['wh1']), biases['bh1'])
    
    #Output of the hidden layer
    hidden_output=tf.nn.relu(hidden_ouput_before_activation)
    
    #Not applying any activation function for the output layer
    output=tf.add(tf.matmul(hidden_output, weights['wo']), biases['bo'])
    
    return output

In [7]:
x=tf.placeholder('float', shape=(None, input_pixels)) #input
y=tf.placeholder(tf.int32, shape=(None, n_out)) #output
pred=cnn(x, weights, biases)  #Predictions i.e. output of the forward propagation

In [8]:
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,labels=y))

In [9]:
optimizer=tf.train.AdamOptimizer(learning_rate=0.01)
optimize=optimizer.minimize(cost)

In [10]:
sess=tf.Session()
sess.run(tf.global_variables_initializer())

In [11]:
batch_size=100
for i in range(25):
    num_batches=int(mnist.train.num_examples/batch_size)
    total_cost=0
    for j in range(num_batches):
        batch_x,batch_y=mnist.train.next_batch(batch_size)
        c,_=sess.run([cost,optimize],feed_dict={x:batch_x,y:batch_y})
        total_cost+=c
    print(total_cost)

1052515.5018329471
29211.698738008738
19028.619024150008
13545.84865722229
14357.581539481878
11991.5606413946
12426.599194691515
10309.33572484029
9909.193868816452
8321.08828842612
7601.565785453555
5277.471898220523
5830.5974011926255
4886.970135342969
5420.74832860721
4301.238702304959
3696.441102922312
3930.347788801424
3471.715747241911
2992.4176112215873
2592.2889920090715
3019.399624204747
3229.108052779778
2284.1158180920975
2490.208405163884


In [12]:
predictions=tf.argmax(pred,1)
correct_labels=tf.argmax(y,1)
correct_predictions=tf.equal(predictions,correct_labels)
predictions,correct_preds=sess.run([predictions,correct_predictions],feed_dict={x:mnist.test.images,
                                  y:mnist.test.labels})
correct_preds.sum() #98.4% accuracy

9840

In [13]:
# Adding dropout layer to our network
#The dropout layer will be added after the hidden layer
def cnn(x, weights, biases, keep_prob): #x is the input, keep_prob refers to the probability with which you'll keep 
#the unit
    
    #Need to reshape our input
    #The shape of our input is 784 but we aren't just going to get one image. We'll get multiple images
    # and so in case of 100 images, shape would be 100*784
    # We want to reshape it to 100*28*28*1 (100*input_width*input_height*input channels)
    x=tf.reshape(x, shape=(-1, input_width, input_height, input_channels)) #Element to be reshaped and the desired 
    #shape. When you already know three of the required parameters for the shape, just write -1 for the fourth
    #as it will infer it for you
    
    #Pass it through CNN layer1. conv1 is the output of CNN layer1
    conv1=conv(x, weights['wc1'], biases['bc1'], stride_conv1) #Calling the conv function with x, weights and biases 
    #of CNN layer 1
    
    #Pass this through maxpool layer1
    conv1_pool=maxpooling(conv1, max_pool1_k) #Calling the maxpooling function with output of CNN layer 1 i.e. conv1 
    #and pooling size of the maxpool layer 1
    
    #Pass it through CNN layer 2. conv2 is the output of CNN layer2
    conv2=conv(conv1_pool, weights['wc2'], biases['bc2'], stride_conv2) #Calling the conv function with conv1_pool, 
    #weights and biases of CNN layer 2
    
    #Pass this through maxpool layer2
    conv2_pool=maxpooling(conv2, max_pool2_k) #Calling the maxpooling function with output of CNN layer 2 i.e. conv2
    #and pooling size of the maxpool layer 2
    
    #Input to the hidden layer will come by reshaping the conv2_pool. If conv2_pool was 7*7*64 and 100 such images
    # gave 100*7*7*64 then the shape would have been 100*7*7*64
    hidden_input=tf.reshape(conv2_pool, shape=(-1, input_size_to_hidden)) #The first element will be inferred by 
    #itself
    
    #We need to pass it through the dense layer
    #Multiply weights by input of the hidden layer, add biases and then apply activation function on it
    hidden_ouput_before_activation=tf.add(tf.matmul(hidden_input, weights['wh1']), biases['bh1'])
    
    #Output of the hidden layer before dropout
    hidden_output_before_dropout=tf.nn.relu(hidden_ouput_before_activation)
    
    #Passing it through the dropout layer
    hidden_output=tf.nn.dropout(hidden_output_before_dropout,keep_prob)
    
    #Not applying any activation function for the output layer
    output=tf.add(tf.matmul(hidden_output, weights['wo']), biases['bo'])
    
    return output

In [14]:
x=tf.placeholder('float', shape=(None, input_pixels)) #input
y=tf.placeholder(tf.int32, shape=(None, n_out)) #output
keep_prob=tf.placeholder('float')
pred=cnn(x, weights, biases, keep_prob)  #Predictions i.e. output of the forward propagation, keep probability

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [15]:
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,labels=y))

In [16]:
optimizer=tf.train.AdamOptimizer(learning_rate=0.01)
optimize=optimizer.minimize(cost)

In [17]:
sess=tf.Session()
sess.run(tf.global_variables_initializer())

In [18]:
batch_size=100
for i in range(25):
    num_batches=int(mnist.train.num_examples/batch_size)
    total_cost=0
    for j in range(num_batches):
        batch_x,batch_y=mnist.train.next_batch(batch_size)
        c,_=sess.run([cost,optimize],feed_dict={x:batch_x,y:batch_y, keep_prob : 0.8}) #Training,
        #If you want to have a dropout layer, pass a non-1 probability here. keep_prob=1 implies not using any
        #dropout layer i.e. keeping all the units.
        total_cost+=c
    print(total_cost)

872747.0000053644
34249.23302859068
20253.851375933737
13011.363422049477
10665.460250850952
8568.314923640064
7578.996290233345
6405.353697190732
6667.005616922814
4443.934203997217
5317.131280366819
4572.758816115221
4384.672963370725
4431.369115413308
3354.9754682826683
3906.374495007949
2560.3539869821607
3085.9683850681154
3019.7926850739955
2487.814646050474
2449.850273968462
2395.42541110889
1976.1401373692206
1934.5229387753607
1876.2988920589673


In [19]:
predictions=tf.argmax(pred,1)
correct_labels=tf.argmax(y,1)
correct_predictions=tf.equal(predictions,correct_labels)
predictions,correct_preds=sess.run([predictions,correct_predictions],feed_dict={x:mnist.test.images, #testing
                                  y:mnist.test.labels, keep_prob : 1.0})
correct_preds.sum() # 98.13% accuracy

9813