In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
np.random.seed(100)

In [3]:
print(tf.__version__)
print(np.__version__)

1.4.0
1.13.3


In [12]:
class CNN(object):
    # Using tf.layers and using chainer-esque style for
    # Architecture taken from:
    # https://www.tensorflow.org/tutorials/layers
    def __init__(self, sess, n_classes, epochs, batch_size):
        self._sess = sess
        self._n_classes = n_classes
        self._epochs = epochs
        self._batch_size = batch_size
        self._build_graph()
        
    def _model(self, x, p_drop):
        # 2.2.1 in https://arxiv.org/abs/1603.07285 to understand CNN dimensions.
        # "same" means solve padding to make output dimensions same as input.
        # With a stride of (1,1), o = (i-k) + 2p + 1 and k = 2p + 1
        # e.g. k = [5,5], then 5 = 2p + 1 -> p = 2 and o = (28-5)+2*2+1 = 28
        # inputs=x, filters=32, kernel_size=[5, 5], stride=(1,1) padding="same", activation=tf.nn.relu
        x = tf.reshape(x, [-1, 28, 28, 1])
        out = tf.layers.conv2d(x,  32, [5, 5], (1,1), "same", activation=tf.nn.relu)  # 28x28x32
        out = tf.layers.max_pooling2d(out, [2, 2], (2,2))                             # 14x14x32
        out = tf.layers.conv2d(out,  64, [5, 5], (1,1), "same", activation=tf.nn.relu)# 14x14x64
        out = tf.layers.max_pooling2d(out, [2, 2], (2,2))                             # 7x7x64
        out = tf.reshape(out, [-1, 7 * 7 * 64])
        out = tf.layers.dense(out, 1024, tf.nn.relu) 
        out = tf.layers.dropout(out, p_drop)
        out = tf.layers.dense(out, self._n_classes)
        return out
        
    def _build_graph(self):
        # Set up placeholders
        self._inputs = tf.placeholder("float", [None, 784])
        self._targets = tf.placeholder("float", [None, self._n_classes])
        self._p_drop = tf.placeholder("float")
        
        self._predict = self._model(self._inputs, self._p_drop)
        self._loss = tf.losses.softmax_cross_entropy(onehot_labels=self._targets, logits=self._predict)
        self._trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(self._loss)        
    
    def train(self, tr_x, tr_y, te_x, te_y):
        tf.global_variables_initializer().run()
        batches = zip(range(0, len(tr_x), self._batch_size), 
                      range(self._batch_size, len(tr_x)+1, self._batch_size)
        )
        for i in range(self._epochs):
            for start, end in batches:
                self._sess.run(self._trainer, feed_dict={
                        self._inputs: tr_x[start:end],
                        self._targets: tr_y[start:end],
                        self._p_drop: 0.5}
                )
            print(i, np.mean(np.argmax(te_y, axis=1) == np.argmax(
                        self._sess.run(self._predict, feed_dict={
                            self._inputs: te_x,
                            self._p_drop: 0.0}), # Set probability of dropout to 0 for testing.
                        axis=1))
            )


In [13]:
# Set up training data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_x, train_y, test_x, test_y = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
n_classes=10
epochs=10
batch_size=128

tf.reset_default_graph()
with tf.Session() as sess:
    model = CNN(sess, 10, 10, 128)
    model.train(train_x, train_y, test_x, test_y)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
(0, 0.95779999999999998)
(1, 0.97819999999999996)
(2, 0.98019999999999996)
(3, 0.98640000000000005)
(4, 0.9889)
(5, 0.98939999999999995)
(6, 0.99029999999999996)
(7, 0.99029999999999996)
(8, 0.99019999999999997)
(9, 0.98980000000000001)
