 # Build CNN from scratch using tensorflow and use on MNIST

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
# Import MNIST data from tensflow. Already downloaded and stored in MNIST_data folder
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
mnist.train.num_examples

55000

In [4]:
mnist.train.images[1].shape

(784,)

In [5]:
tf.reset_default_graph()

# Build CNN graph

In [6]:
image_2d = tf.placeholder(tf.float32, name="images_one_by_one")

In [7]:
# Parameters
s = 3
L1_max_horizontal_iter = int((28-3)/s + 1)
L2_max_horizontal_iter = int((L1_max_horizontal_iter-3)/s + 1)

L1_max_horizontal_iter, L2_max_horizontal_iter

(9, 3)

In [8]:
kernals_L1 = tf.Variable(np.random.rand(3,3,5), dtype=tf.float32, name="layer1_kernals")      # 5 filters (3x3) in layer1
kernals_L2 = tf.Variable(np.random.rand(3,3,5, 2), dtype=tf.float32, name="layer2_kernals")   # 2 filters (3x3x5) in layer2

In [9]:
B1 = tf.Variable(np.random.rand(5), name="L1_biases")    # 5 biases, one each for filters in layer1
B2 = tf.Variable(np.random.rand(2), name="L2_biases")    # 2 biases, one each for filters in layer2

## Build DAG

### CNN Layer1

In [11]:
# Activation Map: Layer1. Activation map is NOT VARIABLE. It's just club of tensorflow ops.

one_kernal_op_list = []
for kernal_idx in range(5):
    kernal = kernals_L1[:,:,kernal_idx]
    
    h_op_list = []
    for h_stride in range(L1_max_horizontal_iter):
        v_op_list = []
        for v_stride in range(L1_max_horizontal_iter):
            
            image_patch = image_2d[v_stride*s:(v_stride+1)*s, h_stride*s:(h_stride+1)*s]
            
            v_op_list.append(tf.reduce_sum(kernal * image_patch))     # Activation map, single element (reduce_sum is imp)
            
        v_strip_activation = tf.stack(v_op_list, axis=0, name="v_strip_activation")
        h_op_list.append(v_strip_activation)
        
    h_strip_activation = tf.stack(h_op_list, axis=1, name="h_strip_activation")
    one_kernal_op_list.append(h_strip_activation)
    
L1_act_map = tf.stack(one_kernal_op_list, axis=2, name = "L1_act_map")
            

In [12]:
L1_act_map

<tf.Tensor 'L1_act_map:0' shape=(9, 9, 5) dtype=float32>

In [13]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)     # Assuring that Activation map is not Variable

[<tf.Variable 'layer1_kernals:0' shape=(3, 3, 5) dtype=float32_ref>,
 <tf.Variable 'layer2_kernals:0' shape=(3, 3, 5, 2) dtype=float32_ref>,
 <tf.Variable 'L1_biases:0' shape=(5,) dtype=float64_ref>,
 <tf.Variable 'L2_biases:0' shape=(2,) dtype=float64_ref>]

In [14]:
# Activation Map: Layer2

one_kernal_op_list = []
for kernal_idx in range(2):
    kernal_2 = kernals_L2[:,:, :, kernal_idx]      # filter here is 3D
    
    h_op_list = []
    for h_stride in range(L2_max_horizontal_iter):
        v_op_list = []
        for v_stride in range(L2_max_horizontal_iter):
            
            L1_act_vol_patch = L1_act_map[v_stride*s:(v_stride+1)*s, h_stride*s:(h_stride+1)*s, : ]
            
            v_op_list.append(tf.reduce_sum(kernal_2 * L1_act_vol_patch))     # Activation map, single element (reduce_sum is imp)
            
        v_strip_activation_2 = tf.stack(v_op_list, axis=0, name="v_strip_activation_L2")
        h_op_list.append(v_strip_activation_2)
        
    h_strip_activation_2 = tf.stack(h_op_list, axis=1, name="h_strip_activation_L2")
    one_kernal_op_list.append(h_strip_activation_2)
    
L2_act_map = tf.stack(one_kernal_op_list, axis=2, name = "L2_act_map")
            

In [15]:
# Flatten the final activation map
L2_act_map_ = tf.stack([L2_act_map,L2_act_map], axis=0)    # False creation of batch_size=2. Required for flatten() op
L2_flattend = tf.contrib.layers.flatten(L2_act_map_)        # O/P shape is [batch_size, k]
L2_flattend.get_shape()

TensorShape([Dimension(2), Dimension(18)])

In [16]:
L2_flattend = L2_flattend[0,:]       # Just the Flattened activation map for one image only. But rank is reduced by 1
L2_flattend = tf.reshape(L2_flattend, shape=(1,-1))    # Restoring the rank
L2_flattend.get_shape()

TensorShape([Dimension(1), Dimension(18)])

# FC or DNN

# Graph for DNN (Just One layer with 10 neurons) 
**18 L2_flatten outputs feed into it**

In [17]:
Y_OHE_true = tf.placeholder(tf.float32, [None,10], name = "Y_OHE_true")   # Thougt we will just use one row at a time here

In [18]:
W = tf.Variable(np.random.rand(18,10), dtype=tf.float32, name="W")
B = tf.Variable(np.random.rand(10), dtype=tf.float32, name="B")

In [19]:
# DAG
Z = tf.add(tf.matmul(L2_flattend,W),B, name="Z")   # 10 outputs, one for each digit

In [20]:
#loss
loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=Z, labels=Y_OHE_true, name="loss")  # loss Per image

In [21]:
# Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
set_to_optimize = optimizer.minimize(loss,name="To_Be_Optimized")

In [24]:
summ = tf.summary.scalar("loss", loss[0])

In [25]:
writer = tf.summary.FileWriter(logdir="./graph", graph=tf.get_default_graph())

In [48]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [49]:
for i in range(1000):    # Train on 1000 images
    batch_x , batch_y = mnist.train.next_batch(1)   # Leveraging inbuilt mini-batch generator . One at a time
    batch_x = batch_x.reshape(28,28)
    sess.run(set_to_optimize, feed_dict={image_2d:batch_x, Y_OHE_true:batch_y})
    s_buffer = sess.run(summ, feed_dict={image_2d:batch_x, Y_OHE_true:batch_y})    # loss on every image
    writer.add_summary(s_buffer, i)
    

In [50]:
sess.run(loss, feed_dict={image_2d:batch_x, Y_OHE_true:batch_y})      # Loss value on last image (i.e. 1000th one)

array([0.60315555], dtype=float32)

In [None]:
predictions = np.zeros(10000)
for i in range(10000):
    predictions[i] = sess.run(tf.argmax(Z, axis=1), feed_dict={image_2d:mnist.test.images[i].reshape([28,28])})
    print(i, end="\r")

In [46]:
predictions.sum()

3296.0

In [41]:
acc = (predictions == np.argmax(mnist.test.labels,axis=1))
acc

array([False, False, False, ..., False, False, False])

In [None]:
acc.mean()

In [52]:
sess.close()
writer.close()