In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets

mnist = read_data_sets("..\data", one_hot=True, reshape=False, validation_size=0)
tf.set_random_seed(0)

# Deep model with [input]->[200 neurons]->[100 neurons]->[output] network architecture
#
# An input of 100 28x28 pixel images will enter the network here
X = tf.placeholder(tf.float32, [None, 28, 28, 1]) 

# 100 images reshaped to a [100x784] matrix
X_ = tf.reshape(X, [-1, 784]) # "-1" = preserves total number of elements

# first hidden layer with 200 neurons 
W1 = tf.Variable(tf.truncated_normal([784, 200], stddev=0.1)) # weights
b1 = tf.Variable(tf.zeros([200])) # biases
Y1 = tf.nn.sigmoid(tf.matmul(X_, W1) + b1) # [100x784]*[784x200]+b1 -> [100x200]

# second hidden layer with 100 neurons
W2 = tf.Variable(tf.truncated_normal([200, 100], stddev=0.1)) # weights
b2 = tf.Variable(tf.zeros([100])) # biases
Y2 = tf.nn.sigmoid(tf.matmul(Y1, W2) + b2) # [100x200]*[200x100]+b2 -> [100x100]

# final output layer
W3 = tf.Variable(tf.truncated_normal([100, 10], stddev=0.1)) # weights
b3 = tf.Variable(tf.zeros([10])) # biases
Y = tf.nn.softmax(tf.matmul(Y2, W3) + b3) # [100x100]*[100x10]+b3 -> [100x10]

# determine optimization method
Y_ = tf.placeholder(tf.float32, [None, 10]) # actual correct results
cross_entropy = -tf.reduce_sum(Y_ * tf.log(Y)) # loss function
eta = 0.003 # learning rate
optimizer = tf.train.GradientDescentOptimizer(eta) 
train_step = optimizer.minimize(cross_entropy)

# train the model
init = tf.global_variables_initializer()
sess = tf.Session() # create a session
sess.run(init) # initialize all variables

for i in range(1000):
    batch_X, batch_Y = mnist.train.next_batch(100) # get a batch of 100 images
    train_data = {X: batch_X, Y_: batch_Y} # create a feed_dict
    
    sess.run(train_step, train_data)
    
    # remove comments to show accuracy at each training step
    # is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
    # accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
    # a,c = sess.run([accuracy, cross_entropy], train_data)
    # print("\nTraining accuracy: " + str(a) + " Training loss: " + str(c))

# check accuracy on test data
is_correct = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
test_data = {X: mnist.test.images, Y_: mnist.test.labels}
a,c = sess.run([accuracy, cross_entropy], test_data)

print("\nTest accuracy: " + str(a) + "\nTest loss: " + str(c))

Extracting ..\data\train-images-idx3-ubyte.gz
Extracting ..\data\train-labels-idx1-ubyte.gz
Extracting ..\data\t10k-images-idx3-ubyte.gz
Extracting ..\data\t10k-labels-idx1-ubyte.gz

Test accuracy: 0.9103
Test loss: 3076.79
