# Basic CNN using MNIST data

This page is implemented by numpy and tensorflow.

Demostrate the simplest VGG-like CNN to classify the MNIST hand-written data.

* Use $3\times3$ convolution kernels
* Stack $2$ or more convolutions before pooling layer

In [5]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
print(tf.__version__)

1.7.0


Define some MNIST constant

In [2]:
height = 28
width = 28
n_groups = 10
raw_image_channel = 1

Define weight initializer

In [3]:
def initial_weight(shape):
    return tf.get_variable('W', shape=shape, initializer=tf.truncated_normal_initializer())
def initial_bias(shape):
    return tf.get_variable('b', shape=shape, initializer=tf.constant_initializer(value=0.1))

Define convolution & relu operation

In [48]:
def conv_block(x, channels, name):
    input_channels = x.shape.as_list()[-1]
    with tf.variable_scope(name):
        w = initial_weight([3,3,input_channels,channels])
        b = initial_bias([channels])
        s = tf.nn.conv2d(x, w, [1,1,1,1], 'SAME')+b
        h = tf.nn.relu(s)
    return h

Build computation graph from x to logits

In [51]:
def build_graph(x, n_groups):
    with tf.variable_scope('conv1'):
        h11 = conv_block(x, 32, 'conv11')
        h12 = conv_block(h11, 32, 'conv12')
    p1 = tf.nn.pool(h12, [2,2], 'MAX', 'SAME', strides=[2,2])
    with tf.variable_scope('conv2'):
        h21 = conv_block(p1, 64, 'conv21')
        h22 = conv_block(h21, 64, 'conv22')
    p2 = tf.nn.pool(h21, [2,2], 'MAX', 'SAME', strides=[2,2])
    feature_size = np.cumprod(p2.shape.as_list()[1:])[-1]
    flatten = tf.reshape(p2, [-1, feature_size], name='flatten')
    with tf.variable_scope('fc1'):
        w_fc1 = initial_weight([feature_size, 128])
        b_fc1 = initial_bias([128])
        s_fc1 = tf.matmul(flatten, w_fc1)+b_fc1
        h_fc1 = tf.nn.relu(s_fc1)
    with tf.variable_scope('fc2'):
        w_fc2 = initial_weight([128, n_groups])
        b_fc2 = initial_bias([n_groups])
        logits = tf.matmul(h_fc1, w_fc2)+b_fc2
    return logits

Evalution model

In [64]:
def evaluate(y, logits):
    loss = tf.losses.softmax_cross_entropy(y, logits)
    pred_label = tf.argmax(logits, axis=1)
    correct_prediction = tf.equal(tf.argmax(y,axis=1), pred_label)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return loss, accuracy

Define input data & training algorithm

In [72]:
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, height, width, raw_image_channel], name='x')
y = tf.placeholder(tf.float32, [None, n_groups], name='y')
logits = build_graph(x, n_groups)
loss, accuracy = evaluate(y, logits)
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

Download MNIST dataset

In [6]:
mnist = input_data.read_data_sets('../data/MNIST/', one_hot=True)

Extracting ../data/MNIST/train-images-idx3-ubyte.gz
Extracting ../data/MNIST/train-labels-idx1-ubyte.gz
Extracting ../data/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../data/MNIST/t10k-labels-idx1-ubyte.gz


Start training

In [84]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(20000):
        batch = mnist.train.next_batch(50)
        images = np.reshape(batch[0], [-1, 28, 28, 1])
        if i % 100 == 0:
            train_accuracy = sess.run(accuracy, feed_dict={x: images, y: batch[1]})
            print('step {}, training accuracy {:.4f}'.format(i, train_accuracy))
        sess.run(train_step, feed_dict={x: images, y: batch[1],})
    
    test_images = np.reshape(mnist.test.images, [-1, 28, 28, 1])
    test_accuracy = sess.run(accuracy, feed_dict={x: test_images, y: mnist.test.labels})
    print('test accuracy {:.4f}'.format(test_accuracy))

step 0, training accuracy 0.1200
step 100, training accuracy 0.1400
step 200, training accuracy 0.2400
step 300, training accuracy 0.4800
step 400, training accuracy 0.5400
step 500, training accuracy 0.4600
step 600, training accuracy 0.6200
step 700, training accuracy 0.6800
step 800, training accuracy 0.7000
step 900, training accuracy 0.7600
step 1000, training accuracy 0.7600
step 1100, training accuracy 0.7200
step 1200, training accuracy 0.6800
step 1300, training accuracy 0.6800
step 1400, training accuracy 0.7000
step 1500, training accuracy 0.8600
step 1600, training accuracy 0.7600
step 1700, training accuracy 0.6800
step 1800, training accuracy 0.7600
step 1900, training accuracy 0.8000
step 2000, training accuracy 0.9400
step 2100, training accuracy 0.8200
step 2200, training accuracy 0.8400
step 2300, training accuracy 0.8000
step 2400, training accuracy 0.7800
step 2500, training accuracy 0.8000
step 2600, training accuracy 0.7600
step 2700, training accuracy 0.8600
step