Here, we will work with the MNIST dataset and try to implement a CNN in Tensorflow.

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
# import data from tensorflow
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
import numpy as np
old_v = tf.logging.get_verbosity()
tf.logging.set_verbosity(tf.logging.ERROR)
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_data = mnist.train.images  # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images  # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

tf.logging.set_verbosity(old_v)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
# must write a function to intitialize weights
def init_weights(shape):
    # initialize with a truncated random normal distribution
    init_random_dist = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(init_random_dist)

In [5]:
# must write a function to initialize bias term
def init_bias(shape):
    # just initialize the bias terms to 0.1
    init_bias_vals = tf.constant(0.1,shape=shape)
    return tf.Variable(init_bias_vals)

# must write a function to perform the 2d convolution
# essentially a wrapper
def conv2d(x,W):
    # x is in the format [batch,H,W,Ch] - this is our input data
    # W is in the format [filter H,filter W,Ch IN,Ch OUT]
    
    # use strides of 1 in every direction
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

# write a function for a pooling layer
def max_pool_2by2(x):
    # x is in the format [batch,H,W,Ch]
    
    # k_size is the window size of the pooling
    # i.e. [2,2] takes the max of every 2 by 2 window
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [6]:
# Conv layer
def convolutional_layer(input_x,shape):
    # initialize weights of layer
    W = init_weights(shape)
    b = init_bias([shape[3]]) # because the output term is represented with
    # fourth element of shape tuple
    ## now put into an activation function
    return tf.nn.relu(conv2d(input_x,W)+b)

# normal layer
def normal_full_layer(input_layer,size):
    # gets the amount of rows in the input layer
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size,size])
    b = init_weights([size])
    return tf.matmul(input_layer,W) + b

In [7]:
# now time to build the actual network 
# must build placeholders to represent data

# 784 because there are 784 pixels
x = tf.placeholder(tf.float32,shape=[None,784])
y_true = tf.placeholder(tf.float32,shape=[None,10])

In [8]:
# Layers

# must reshape into a 28x28 image
# only one channel because it is grayscale
x_image = tf.reshape(x,[-1,28,28,1])

In [9]:
# first layer
# represents 5x5 convolutional layer
# with grayscale channel and 32 output features
convo_1 = convolutional_layer(x_image,shape=[5,5,1,32])
convo_1_pooling = max_pool_2by2(convo_1)

In [11]:
# second layer
# also a 5x5 convolutional layer
# this time has 64 output features
convo_2 = convolutional_layer(convo_1_pooling,shape=[5,5,32,64])
convo_2_pooling = max_pool_2by2(convo_2)

In [12]:
# reshape so we flatten the layer
# and feed this into the 
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [14]:
# create a dropout
# that will be defined as a placeholder

hold_prob = tf.placeholder(tf.float32)
full_one_dropout= tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [15]:
# because there are 10 outputs
y_pred = normal_full_layer(full_one_dropout,10)

In [18]:
# must now create a loss function
# we will use cross entropy
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)

init = tf.global_variables_initializer()

In [21]:
steps = 1000

with tf.Session() as sess:
    sess.run(init)
    
    for i in range(steps):
        batch_x, batch_y = mnist.train.next_batch(50)
        
        sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
        
        if i % 100 == 0:
            print("ON STEP: {}".format(i))
            print("ACCURACY: ")
            # count number of predictions that were correct
            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
            acc = tf.reduce_mean(tf.cast(matches,tf.float32))
            print(sess.run(acc,feed_dict={x:mnist.test.images,y_true:mnist.test.labels,
                                         hold_prob:1.0}))
            print('\n')
            

ON STEP: 0
ACCURACY: 
0.0964


ON STEP: 100
ACCURACY: 
0.9311


ON STEP: 200
ACCURACY: 
0.9561


ON STEP: 300
ACCURACY: 
0.9643


ON STEP: 400
ACCURACY: 
0.9669


ON STEP: 500
ACCURACY: 
0.9766


ON STEP: 600
ACCURACY: 
0.9759


ON STEP: 700
ACCURACY: 
0.9801


ON STEP: 800
ACCURACY: 
0.9735


ON STEP: 900
ACCURACY: 
0.9818


