Based on [Deep MNIST for Experts](https://www.tensorflow.org/versions/r1.2/get_started/mnist/pros)

# Import libraries

In [1]:
"""A deep MNIST classifier using convolutional layers.

See extensive documentation at
https://www.tensorflow.org/get_started/mnist/pros
"""
# Disable linter warnings to maintain consistency with tutorial.
# pylint: disable=invalid-name
# pylint: disable=g-bad-import-order

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import math

# Define basic blocks

In [2]:
def conv_layer(input, channels_in, channels_out, name="conv"):
  with tf.name_scope(name):
    """conv2d returns a 2d convolution layer with full stride."""
    W = tf.Variable(tf.truncated_normal([5, 5, channels_in, channels_out], stddev=0.1), name="W")
    b = tf.Variable(tf.zeros([channels_out]), name="b")
    conv = tf.nn.conv2d(input, W, strides=[1, 1, 1, 1], padding='SAME', name="conv2d")
    act  = tf.nn.relu(conv + b, name='act')
    max_pool = tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max_pool")
    tf.summary.histogram("weights", W)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", act)
    return max_pool

def fc_layer(input, channels_in, channels_out, name='fc'):
  with tf.name_scope(name):
    W = tf.Variable(tf.truncated_normal([channels_in, channels_out], stddev=0.1), name="W")
    b = tf.Variable(tf.zeros([channels_out]), name="b")
    logits = tf.matmul(input, W) + b
    tf.summary.histogram("weights", W)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("logits", logits)
    return logits

# Network Structure

In [3]:
def deepnn(x):
  """deepnn builds the graph for a deep net for classifying digits.

  Args:
    x: an input tensor with the dimensions (N_examples, 784), where 784 is the
    number of pixels in a standard MNIST image.

  Returns:
    A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
    equal to the logits of classifying the digit into one of 10 classes (the
    digits 0-9). keep_prob is a scalar placeholder for the probability of
    dropout.
  """
  # Reshape to use within a convolutional neural net.
  # Last dimension is for "features" - there is only one here, since images are
  # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
  x_image = tf.reshape(x, [-1, 28, 28, 1], name="x_image")
  tf.summary.image('input', x_image, 10)

  conv1      = conv_layer(x_image, 1, 32, "conv1")
  conv2      = conv_layer(conv1,  32, 64, "conv2")
  flattened  = tf.reshape(conv2, [-1, 7*7*64])
  fc1_logits = fc_layer(flattened, 7*7*64, 1024, "fc1_logits")
  fc1_act    = tf.nn.relu(fc1_logits, name="fc1_act")

  keep_prob = tf.placeholder(tf.float32, name="keep_prob")
  drop = tf.nn.dropout(fc1_act, keep_prob, name="drop")

  fc2_logits = fc_layer(drop, 1024, 10, "fc2_logits")

  return fc2_logits, keep_prob

# Import Data

In [5]:
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


# Create the model

In [6]:
x = tf.placeholder(tf.float32, [None, 784], name="x")
y_ = tf.placeholder(tf.float32, [None, 10], name="y_")
y_conv, keep_prob = deepnn(x)

# Train

In [7]:
with tf.name_scope("cross_entropy"):
  cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv), name="cross_entropy")
  tf.summary.scalar("cross_entropy", tf.squeeze(cross_entropy))

with tf.name_scope("train"):
  train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy, name="train_step")

with tf.name_scope("accuracy"):
  correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1), name="correct_prediction")
  #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy")
  total_correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32), name="total_correct")
  total_in_batch = tf.cast(tf.shape(correct_prediction), tf.float32,name="total_in_batch")
  calc_accuracy = total_correct / total_in_batch
  tf.summary.scalar("calc_accuracy", tf.squeeze(calc_accuracy))

# Set up forusing GPU
config=tf.ConfigProto(log_device_placement=True)
config.gpu_options.allow_growth = True

sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("output1")
writer.add_graph(sess.graph)
for i in range(1001):
  x_train, y_train = mnist.train.next_batch(64)
  if i % 100 == 0:
    calc_acc = sess.run(
        calc_accuracy, 
        feed_dict={x: x_train, y_: y_train, keep_prob: 1.0})
    print('step %d, training accuracy %g' % (i, calc_acc))
  if i % 5 == 0:
    s=sess.run(merged_summary, feed_dict={x: x_train, y_: y_train, keep_prob: 1.0})
    writer.add_summary(s, i)
  sess.run(train_step, feed_dict={x: x_train, y_: y_train, keep_prob: 0.5})

step 0, training accuracy 0.15625
step 100, training accuracy 0.84375
step 200, training accuracy 0.9375
step 300, training accuracy 0.984375
step 400, training accuracy 0.96875
step 500, training accuracy 0.96875
step 600, training accuracy 0.96875
step 700, training accuracy 0.9375
step 800, training accuracy 0.96875
step 900, training accuracy 0.984375
step 1000, training accuracy 0.984375


In [8]:
  print("x_train shape:" + str(x_train.shape))
  print("train images shape: " + str(mnist.train.images.shape))
  print("test images shape: " + str(mnist.test.images.shape))
  x_test, y_test = mnist.test.next_batch(64)
  print(x_test.shape)
  print(y_test.shape)

x_train shape:(64, 784)
train images shape: (55000, 784)
test images shape: (10000, 784)
(64, 784)
(64, 10)


# Test

In [9]:
test_minibatch_size = 64
num_of_test_vectors = mnist.test.images.shape[0]
print("num_of_test_vectors: " + str(num_of_test_vectors))
num_of_test_minibatches = math.ceil(num_of_test_vectors / test_minibatch_size)
print("num of minibatches = " + str(num_of_test_minibatches))

total, correct = 0, 0
for _ in range(num_of_test_minibatches):
    x_test, y_test = mnist.test.next_batch(test_minibatch_size)
    batch_total, batch_correct = sess.run([total_in_batch, total_correct], feed_dict={
          x: x_test, y_: y_test, keep_prob: 1.0})
    total += batch_total
    correct += batch_correct

test_accuracy = correct / total
print('test accuracy %g' % (test_accuracy))

num_of_test_vectors: 10000
num of minibatches = 157
test accuracy 0.971537


In [10]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

SystemError: GPU device not found