In [1]:
import tensorflow as tf

import numpy as np

  return f(*args, **kwds)


In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
X_train = X_train.astype(np.float32).reshape((-1, 28 * 28)) / 255.
X_test = X_test.astype(np.float32).reshape((-1, 28 * 28)) / 255.

In [4]:
y_test_temp = y_test.copy()
y_train_temp = y_train.copy()

In [5]:
y_test = np.zeros((y_test_temp.size, 10))
y_test[np.arange(y_test_temp.size), y_test_temp] = 1

y_train = np.zeros((y_train_temp.size, 10))
y_train[np.arange(y_train_temp.size), y_train_temp] = 1

In [6]:
learning_rate = 0.001
num_steps = 500
batch_size = 128
display_step = 10

n_inputs = 28 * 28
n_classes = 10
dropout = 0.7

In [7]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.placeholder(tf.float32, shape=(None, n_classes), name='y')
keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob')

In [8]:
def conv_layer(X, W, b, stride=1):
    """
    Convolutional Layer
    
    Parameters
    ----------
    X: Tensor of shape (batch_size, height, width, n_input_channels)
        Input Image
    
    W: Tensor of shape (filter_height, filter_width, n_input_channels, n_output_channels)
        Filter
    
    b: Tensor of shape (n_output_channels,)
        Bias
    
    stride: int
        Stride
    """
    conv_out = tf.nn.conv2d(X, W, strides=[1, stride, stride, 1], padding='SAME')
    conv_out = tf.nn.bias_add(conv_out, b)
    relu = tf.nn.relu(conv_out)
    return relu

def maxpool_layer(X, k=2):
    return tf.nn.max_pool(X, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

def conv_net(X, W, b):
    X = tf.reshape(X, shape=(-1, 28, 28, 1))
    
    conv1 = conv_layer(X, W['W_CONV_1'], b['B_CONV_1'])
    conv1 = maxpool_layer(conv1, k=2)
    
    conv2 = conv_layer(conv1, W['W_CONV_2'], b['B_CONV_2'])
    conv2 = maxpool_layer(conv2, k=2)
    
    fc1 = tf.reshape(conv2, (-1, W['W_FC_1'].get_shape().as_list()[0]))
    fc1 = tf.add(tf.matmul(fc1, W['W_FC_1']), b['B_FC_1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, keep_prob)
    out = tf.add(tf.matmul(fc1, W['OUT']), b['OUT'])
    
    return out

In [9]:
weight = {
    # 5x5 CONV, 1 input, 32 outputs
    'W_CONV_1': tf.Variable(tf.random_normal((5, 5, 1, 32))),
    
    # 5x5 CONV, 32 inputs, 64 outputs
    'W_CONV_2': tf.Variable(tf.random_normal((5, 5, 32, 64))),
    
    # FC, 7*7*64 inputs, 1024 outputs
    'W_FC_1': tf.Variable(tf.random_normal((7*7*64, 1024))),
    
    # 1024 inputs, 10 outputs
    'OUT': tf.Variable(tf.random_normal((1024, n_classes)))
}

bias = {
    'B_CONV_1': tf.Variable(tf.random_normal((32,))),
    'B_CONV_2': tf.Variable(tf.random_normal((64,))),
    'B_FC_1': tf.Variable(tf.random_normal((1024,))),
    'OUT': tf.Variable(tf.random_normal((n_classes,))),
}

In [10]:
logits = conv_net(X, weight, bias)
predictions = tf.nn.softmax(logits)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=logits, labels=y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
optimize_op = optimizer.minimize(loss_op)

In [11]:
correct = tf.equal(tf.argmax(predictions, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [12]:
init = tf.global_variables_initializer()

In [13]:
def get_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch = X[batch_idx]
        y_batch = y[batch_idx]
        
        yield X_batch, y_batch

In [14]:
with tf.Session() as sess:
    sess.run(init)
    for step, (batch_X, batch_y) in enumerate(get_batch(X_train, y_train, batch_size)):
        sess.run(optimize_op, feed_dict={X: batch_X,
                                         y: batch_y,
                                         keep_prob: dropout})
        if step % display_step == 0:
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_X,
                                                                 y: batch_y,
                                                                 keep_prob: dropout})
            print("step {step}: loss = {loss}, accuracy = {acc}".format(step=step,
                                                                        loss=loss,
                                                                        acc=acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test,
                                             y: y_test,
                                             keep_prob: 1.0})
    print("Testing Accuracy: {}".format(test_acc))

step 0: loss = 90435.6953125, accuracy = 0.13178294897079468
step 10: loss = 55376.44921875, accuracy = 0.1860465109348297
step 20: loss = 37931.41015625, accuracy = 0.2945736348628998
step 30: loss = 24829.33203125, accuracy = 0.40310078859329224
step 40: loss = 21526.392578125, accuracy = 0.5116279125213623
step 50: loss = 12973.8681640625, accuracy = 0.5813953280448914
step 60: loss = 9727.794921875, accuracy = 0.643410861492157
step 70: loss = 9018.072265625, accuracy = 0.6589147448539734
step 80: loss = 10553.1005859375, accuracy = 0.643410861492157
step 90: loss = 5876.2490234375, accuracy = 0.7286821603775024
step 100: loss = 9237.666015625, accuracy = 0.78125
step 110: loss = 5631.43359375, accuracy = 0.7890625
step 120: loss = 3646.650390625, accuracy = 0.8046875
step 130: loss = 4209.49169921875, accuracy = 0.8046875
step 140: loss = 2292.027587890625, accuracy = 0.8671875
step 150: loss = 3959.86328125, accuracy = 0.8359375
step 160: loss = 4414.4541015625, accuracy = 0.7812