In [74]:
import tensorflow as tf
import numpy as np
import math
import time

In [68]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.int32)

x_test = x_test.astype(np.float32)
y_test = y_test.astype(np.int32)

In [54]:
# Input(s):
# shape = [kernel_width, kernel_height, #channels, #filters]
# Output(s):
# weight with the same shape as "shape"
def conv_weight_variable(shape):
    initial = np.zeros([shape[3], shape[0] * shape[1] * shape[2]])
    n = shape[0] * shape[1] * shape[2]
    for i in range(0, shape[3]):
        initial[i] = np.random.randn(n) * math.sqrt(2.0 / n)
    initial = np.transpose(initial.reshape(shape[3], shape[0], shape[1], shape[2]), (1, 2, 3, 0))
    return tf.Variable(initial, dtype=tf.float32)

In [55]:
def bias_variable(shape):
    initial = tf.constant(0., shape=shape)
    return tf.Variable(initial, dtype=tf.float32)

In [47]:
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

In [56]:
# Input(s):
# shape = [#in_features, #out_features]
def fc_weight_variable(shape):
    initial = np.zeros([shape[1], shape[0]])
    n = shape[0]
    for i in range(0, shape[1]):
        initial[i] = np.random.randn(n) * math.sqrt(2.0 / n)
    initial = np.transpose(initial, (1, 0))
    return tf.Variable(initial, dtype=tf.float32)

In [57]:
def cnn(X):
    X = tf.reshape(X, [-1, 28, 28, 1])
    
    W_conv1 = conv_weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1)
    h_pool1 = max_pool(h_conv1)
    
    # Increase #filters relatively to the previous layer
    # to capture more higher abstract information
    W_conv2 = conv_weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool(h_conv2)
    
    # Width & height are reduced 4 times due to max_pool two times.
    W_fc1 = fc_weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2 = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2, W_fc1) + b_fc1)
    
    # 10 classes: 0-9
    W_fc2 = fc_weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    logits = tf.matmul(h_fc1, W_fc2) + b_fc2
    return logits

In [97]:
learning_rate = 0.001
batch_size = 256
n_epochs = 10

In [69]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)

X, Y = iterator.get_next()

train_init = iterator.make_initializer(train_dataset)
test_init = iterator.make_initializer(test_dataset)

In [70]:
logits = cnn(X)

entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=Y)
loss = tf.reduce_mean(entropy)

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [98]:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
#     writer = tf.summary.FileWriter('./graphs/mnist', sess.graph)
    start_time = time.time()
    
    sess.run(tf.global_variables_initializer())
    n_batches = int(x_train.shape[0] / batch_size)
    for i in range(n_epochs):
        sess.run(train_init)
        total_loss = 0
        
        try:
            while True:
                _, loss_batch = sess.run([optimizer, loss])
                total_loss += loss_batch
        except tf.errors.OutOfRangeError:
            pass
            
        print('Average loss epoch {0}: {1}'.format(i, total_loss / n_batches))
        
    print('Total time: {0} seconds'.format(time.time() - start_time))
    print('Optimization Finished!')
            
    # test the model
    preds = tf.nn.softmax(logits)
    correct_preds = tf.equal(tf.cast(tf.argmax(preds, 1), tf.int32), Y)
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) # need numpy.count_nonzero(boolarr) :(
    
    n_batches = int(x_test.shape[0] / batch_size)
    total_correct_preds = 0
    
    sess.run(test_init)
    try:
        while True:
            accuracy_batch = sess.run(accuracy) 
            total_correct_preds += accuracy_batch
    except tf.errors.OutOfRangeError:
        pass
    
    print('Accuracy {0}'.format(total_correct_preds / x_test.shape[0]))

#     writer.close()

Average loss epoch 0: 0.22743989607223716
Average loss epoch 1: 0.041843438457347386
Average loss epoch 2: 0.029003902237949908
Average loss epoch 3: 0.0194148165676305
Average loss epoch 4: 0.012681601892432405
Average loss epoch 5: 0.010173060589606285
Average loss epoch 6: 0.008806036743827952
Average loss epoch 7: 0.0072059630713342
Average loss epoch 8: 0.0055047362426489405
Average loss epoch 9: 0.004686543654315756
Total time: 38.560025691986084 seconds
Optimization Finished!
Accuracy 0.9906
