In [1]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

import tensorflow as tf

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz


In [2]:
learning_rate = 0.00001
epochs = 10
batch_size = 128

test_valid_size = 256

n_classes = 10
dropout = 0.75

In [3]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

In [4]:
def conv2d(x, w, b, strides=1):
    # stride = [batch_stride, height, width, feature_stride]
    # best to leave batch and feature @ 1, also Height usually equals width
    # for a square stride
    
    #bias_add is used because the tensors are different shapes, the bias will
    #be 1-D and the weights 4-D
    
    x = tf.nn.conv2d(x, w, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [5]:
def maxpool2d(x, k=2):
    #does pooling for a kernel(square) of size k and skip every k steps
    #kernels are non-overlapping
    return tf.nn.max_pool(x, 
                          ksize=[1, k, k, 1], 
                          strides=[1, k, k, 1], 
                          padding='SAME')

In [6]:
def conv_net(x, weights, biases, dropout):
    #Layer 1 - input: 28x28x1 => 14x14x32
    conv1 = conv2d(x, weights['wc1'], biases['bc1']) #returns 28x28x32
    conv1 = maxpool2d(conv1, k=2) #returns 14x14x32
    
    #Layer 2 - 14x14x32 to 7x7x64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) #14x14x64
    conv2 = maxpool2d(conv2, k = 2) #7x7x64
    
    #reshapes conv2 cube of 7x7x64 to "vector" of length 7*7*64 = 3136
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)
    
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [7]:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

logits = conv_net(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate)\
.minimize(cost)
    
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()

In [22]:
%timeit
with tf.Session() as sess:
    sess.run(init)
    num_batches = int(mnist.train.num_examples / batch_size)
    for epoch in range(epochs):
        for batch in range(int(mnist.train.num_examples / batch_size)):
            images, labels = mnist.train.next_batch(batch_size)
            
            sess.run(optimizer, 
                     feed_dict = {
                         x : images,
                         y : labels,
                         keep_prob : dropout
                     })
            if batch % 10 == 0:
                train_loss, train_acc = sess.run([cost, accuracy], 
                                       feed_dict={
                                             x : images,
                                             y : labels,
                                             keep_prob : 1.0                                           
                                       })
                valid_acc = sess.run(accuracy,
                                feed_dict={
                                    x: mnist.validation.images[:test_valid_size],
                                    y: mnist.validation.labels[:test_valid_size],
                                    keep_prob: 1.0
                                })
                print('Epoch {:>2}, Batch {:>3}/{:>2} - Loss {:>10.4f} Train Accuracy {:.6f} - Val Accuracy {:.6f}'\
                     .format(epoch + 1, batch + 1, num_batches, train_loss, train_acc, valid_acc)
                     )
    test_acc = sess.run(accuracy, feed_dict={
        x: mnist.test.images[:test_valid_size],
        y: mnist.test.labels[:test_valid_size],
        keep_prob: 1.0
    })
    print('Testing Accuracy: {}'.format(test_acc))

Epoch  1, Batch   1/429 - Loss 63851.8164 Train Accuracy 0.171875 - Val Accuracy 0.175781
Epoch  1, Batch  11/429 - Loss 28516.5625 Train Accuracy 0.273438 - Val Accuracy 0.222656
Epoch  1, Batch  21/429 - Loss 17210.3320 Train Accuracy 0.257812 - Val Accuracy 0.335938
Epoch  1, Batch  31/429 - Loss 13066.3652 Train Accuracy 0.382812 - Val Accuracy 0.410156
Epoch  1, Batch  41/429 - Loss 10234.9160 Train Accuracy 0.406250 - Val Accuracy 0.468750
Epoch  1, Batch  51/429 - Loss  8564.0957 Train Accuracy 0.507812 - Val Accuracy 0.500000
Epoch  1, Batch  61/429 - Loss  7759.7900 Train Accuracy 0.515625 - Val Accuracy 0.558594
Epoch  1, Batch  71/429 - Loss  4711.3623 Train Accuracy 0.593750 - Val Accuracy 0.609375
Epoch  1, Batch  81/429 - Loss  4924.7158 Train Accuracy 0.640625 - Val Accuracy 0.640625
Epoch  1, Batch  91/429 - Loss  6558.4468 Train Accuracy 0.570312 - Val Accuracy 0.660156
Epoch  1, Batch 101/429 - Loss  4780.0781 Train Accuracy 0.687500 - Val Accuracy 0.683594
Epoch  1, 

In [19]:
print('num batches', int(mnist.train.num_examples / batch_size))

num batches 429
