In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import time
from datetime import timedelta
import math
from tensorflow.examples.tutorials.mnist import input_data


def new_weights(shape):
    """function to define weights """
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))


def new_biases(length):
    '''function to define bias'''
    return tf.Variable(tf.constant(0.05, shape=[length]))


def create_new_conv_layer(inp, num_input_channels, filter_size,  num_filters): 
    '''
    function to create new convolutional layers
    input: input image, number of input channels, filter size, number of filters
    output: convolution layer, weights
    '''
    #shape of the filter    
    shape = [filter_size, filter_size, num_input_channels, num_filters]

    #create new weights
    weights = new_weights(shape)

    #create new biases, one for each filter.
    biases = new_biases(num_filters)
    
    #create convolution layer
    layer = tf.nn.conv2d(input=inp,
                         filter=weights,
                         strides=[1, 1, 1, 1],
                         padding='SAME')
    #add biases 
    layer = layer+biases
    
    #Max pooling of size 2x2 with a stride of 2. 
    layer = tf.nn.max_pool(value=layer,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME')
    

    # use non-Linearity used in all layers,  ReLU 
    layer = tf.nn.relu(layer)

    return layer, weights


def flatten_layer(layer):
    '''
    Function to flatten the convolution layer
    inputs: convolution layer
    output: flattened layer, number of features
    '''
    # shape of the input layer.
    # layer_shape == [num_images, img_height, img_width, num_channels]
    layer_shape = layer.get_shape()

    # get number of features
    # The number of features is: img_height * img_width * num_channels
    num_features = layer_shape[1:4].num_elements()
    
    # reshape the layer to [num_images, num_features].
    # The shape of the flattened layer is  [num_images, img_height * img_width * num_channels]
    layer_flat = tf.reshape(layer, [-1, num_features])

    
    return layer_flat, num_features


def create_new_fully_conn_layer(inp,  num_inputs, num_outputs): 
    '''
    Function to create fully connected layer
    input: previous layer
    output: fully connected layer
    '''
    
    # Create new weights and biases.
    weights = new_weights([num_inputs, num_outputs])
    biases = new_biases(num_outputs)

    # create the layer by multiplying weights and inputs and adding bias
    layer = tf.matmul(inp, weights) + biases

    # use ReLu
    layer = tf.nn.relu(layer)
    
    return layer


def create_new_softmax_layer(inp,  num_inputs, num_outputs): 
    '''
    Function to create softmax_layer layer
    input: fully connected layer
    output: layer, softmax predictions
    '''
    
    # Create new weights and biases.
    weights = new_weights([num_inputs, num_outputs])
    biases = new_biases(num_outputs)

    # create the layer by multiplying weights and inputs and adding bias
    layer = tf.matmul(inp, weights) + biases
    pred = tf.nn.softmax(layer)
    
    
    return layer,pred


def cnn_mnist(learning_rate,iterations):  
    '''
    Function of CNN model
    '''
        
    #configuration of convolution layers

    
    # Convolutional Layer 1.
    filter_size1 = 5          # each filter 5x5 pixel
    num_filters1 = 6         # 6 filters

    # Convolutional Layer 2.
    filter_size2 = 5          # each filter 5X5 pixel
    num_filters2 = 16         # 16 filters

    # Fully-connected layer1.
    fully_conn_layer_size1 = 120            # Number of units in the fully connected hidden layer1 = 120

    # Fully-connected layer2.
    fully_conn_layer_size2 = 84            # Number of units in the fully connected hidden layer2 = 84

    #load mnist image
    data = input_data.read_data_sets('data/MNIST/', one_hot=True)
    
    
    #class labels of test data
    Y_test = np.argmax(data.test.labels, axis=1)

    #data dimensions
    img_size = 28

    # flattened image
    img_size_flat = img_size * img_size

    # shape of the image
    img_shape = (img_size, img_size)

    # Number of colour channels for the images: 1 channel for gray-scale.
    num_channels = 1

    # Number of classes, one class for each of 10 digits.
    num_classes = 10

    #Placeholders
    x = tf.placeholder(tf.float32, shape=[None, img_size_flat])
    #reshape the image
    x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])

    #true y in one hot vector form
    y_true_one_hot = tf.placeholder(tf.float32, shape=[None, num_classes])

    #classes in y
    y_true_class = tf.argmax(y_true_one_hot, axis=1)

    #create first convolution layer with input as image
    conv_layer1, weights_conv1 = create_new_conv_layer(x_image, num_channels,
                                                       filter_size1, num_filters1)

    #create second convolution layer with input as the output from first convolution layer 
    #number of input channels= number of filters in first layer
    conv_layer2, weights_conv2 = create_new_conv_layer(conv_layer1, num_filters1, filter_size2,num_filters2)

    #flatten the layers
    layer_flat, num_features = flatten_layer(conv_layer2)
    #create fully connected layer1 with the flattened layer as input
    fully_conn_layer1 = create_new_fully_conn_layer(layer_flat,num_features,fully_conn_layer_size1)
    #create fully connected layer2 with the  output of fully connected layer1 as input
    fully_conn_layer2 = create_new_fully_conn_layer(fully_conn_layer1,fully_conn_layer_size1,fully_conn_layer_size2)
    #create softmax layer
    soft_layer, y_pred = create_new_softmax_layer(fully_conn_layer2, fully_conn_layer_size2,num_classes)

    #find the predicted classes
    #y_pred = tf.nn.softmax(fully_conn_layer2)
    y_pred_class = tf.argmax(y_pred, axis=1)

    #optimize cost function
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=soft_layer,labels=y_true_one_hot)
   

    cost = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)


    #find accuracy
    correct_prediction = tf.equal(y_pred_class, y_true_class)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #tensorflow run
    session = tf.Session()
    session.run(tf.global_variables_initializer())

    #train the cnn
    train_batch_size=100
    for i in range(iterations):
        #get a batch of training data
        x_batch, y_true_batch = data.train.next_batch(train_batch_size)

        res=session.run([cost,optimizer], feed_dict={x:x_batch, y_true_one_hot:y_true_batch})
        acc=session.run(accuracy, feed_dict={x:x_batch, y_true_one_hot:y_true_batch})
    print("Training cost",res[0])
    print("Train accuracy: ",acc*100)
    
    #validate the cnn
    acc=session.run(accuracy, feed_dict={x:data.validation.images , y_true_one_hot:data.validation.labels})
   
    print("Validation accuracy: ",acc*100)
    
    #test the cnn
    acc=session.run(accuracy, feed_dict={x:data.test.images , y_true_one_hot:data.test.labels})
    
    print("Test accuracy: ",acc*100)
    
    

cnn_mnist(0.2,2000)


Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz
Training cost 0.05577536
Train accuracy:  100.0
Validation accuracy:  98.60000014305115
Test accuracy:  98.7500011920929
