## Load MNIST Dataset & Import TensorFlow

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
import tensorflow as tf

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## Input Layer

In [3]:
x = tf.placeholder('float', [None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1])

In [4]:
x_image.get_shape()

TensorShape([Dimension(None), Dimension(28), Dimension(28), Dimension(1)])

## Convolution Matrix Example
![image](https://docs.gimp.org/en/images/filters/examples/convolution-calculate.png)

## Variable Constructors

In [5]:
def weight_variable(shape):
    # truncate values whose magnitude is more than 2 standard deviations
    initial = tf.truncated_normal(shape, mean=0.0, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

## Layer Constructors

In [6]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

## First Hidden Layer

### Weight & Bias

In [7]:
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

### Convolution Layer

In [10]:
z_conv1 = conv2d(x_image, W_conv1) + b_conv1

### Activation Function: ReLU

In [14]:
h_conv1 = tf.nn.relu(z_conv1)

### Max Pooling Layer

In [15]:
h_pool1 = max_pool_2x2(h_conv1)

In [12]:
print('x', x_image.get_shape())
print('W', W_conv1.get_shape())
print('b', b_conv1.get_shape())

x (?, 28, 28, 1)
W (5, 5, 1, 32)
b (32,)


In [13]:
print('h_conv1', h_conv1.get_shape())
print('h_pool1', h_pool1.get_shape())

h_conv1 (?, 28, 28, 32)
h_pool1 (?, 14, 14, 32)


## Second Hidden Layer

In [10]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

In [11]:
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [12]:
print('h_conv2', h_conv2.get_shape())
print('h_pool2', h_pool2.get_shape())

h_conv2 (?, 14, 14, 64)
h_pool2 (?, 7, 7, 64)


## Output Layer

### First Fully Connected Output Layer

In [13]:
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

### DropOut

In [14]:
keep_prob = tf.placeholder('float')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

### Second Fully Connected Output Layer

In [15]:
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

h_fc2 = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

### SoftMax

In [16]:
y = tf.nn.softmax(h_fc2)

## Training

In [17]:
y_label = tf.placeholder('float', [None, 10])
cross_entropy = -tf.reduce_sum(y_label * tf.log(y))
train = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

In [18]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())

In [19]:
for i in range(20000):
    batch_x, batch_y = mnist.train.next_batch(50)
    
    if i % 1000 == 0:
        train_accuracy = sess.run(accuracy, feed_dict={
                x: batch_x,
                y_label: batch_y,
                keep_prob: 1.0
            })
        print('step %d, training accuracy %g' % (i, train_accuracy))
    
    sess.run(train, feed_dict={
            x: batch_x,
            y_label: batch_y,
            keep_prob: 0.5
        })

step 0, training accuracy 0.02
step 1000, training accuracy 0.94
step 2000, training accuracy 0.92
step 3000, training accuracy 0.98
step 4000, training accuracy 1
step 5000, training accuracy 1
step 6000, training accuracy 0.98
step 7000, training accuracy 1
step 8000, training accuracy 1
step 9000, training accuracy 1
step 10000, training accuracy 1
step 11000, training accuracy 1
step 12000, training accuracy 1
step 13000, training accuracy 1
step 14000, training accuracy 1
step 15000, training accuracy 1
step 16000, training accuracy 1
step 17000, training accuracy 1
step 18000, training accuracy 1
step 19000, training accuracy 1


In [20]:
test_accuracy = sess.run(accuracy, feed_dict=d_dict={
        x: mnist.test.images,
        y_label: mnist.test.labels,
        keep_prob: 1.0
    })

print('test accuracy %g' % test_accuracy)

test accuracy 0.9931
