In [1]:
# import usual suspects
import tensorflow as tf
import numpy as np

In [2]:
# Get data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/', one_hot = True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
print (mnist.train.num_examples)
print (mnist.train.images.shape)

55000
(55000, 784)


In [16]:
# setup hyper parameters (learning rate, training epochs, batch size, display step)
learning_rate = 0.001
training_epochs = 10
batch_size = 256
display_step = 1

In [7]:
# setup network parameters (size of different hidden layers, size of input features, size of output classes)
n_hidden_1 = 256
n_hidden_2 = 256
n_input = 784
n_classes = 10

In [8]:
# setup input placeholders for X with shape of (None initial, n_input ) and Y with shape (None initial , n_classes )
X = tf.placeholder(tf.float32, shape=[None, n_input])
Y = tf.placeholder(tf.float32, shape=[None, n_classes])


In [9]:
# setup weight and bias variables
W = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1]), name='h1'),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]), name='h2'),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]), name='w_out')
}

b = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1]), name='b1'),
    'b2': tf.Variable(tf.random_normal([n_hidden_2]), name='b2'),
    'out': tf.Variable(tf.random_normal([n_classes]), name='b_out')
}

In [10]:
# create a function name multilayer_perceptron(x,weights, biases) and return out_layer

def multilayer_perceptron(x, weights, bias):
    # 1 hidden layer with Linear and then RELU Activation
    layer1 = tf.add(tf.matmul(x, weights['h1']), bias['b1'])
    layer1 = tf.nn.relu(layer1)
    
    # 2 hidden layer with Linear and then RELU Activation
    layer2 = tf.add(tf.matmul(layer1, weights['h2']), bias['b2'])
    layer2 = tf.nn.relu(layer2)
    
    # Outut layer with Linear Activation
    out_layer = tf.add(tf.matmul(layer2, weights['out']), bias['out'])
    
    return out_layer



In [11]:
# setup pred model
pred = multilayer_perceptron(X, W, b)

In [12]:
# setup cost which is average of cross entropy function aka -ylog(pred)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))

In [13]:
# setup optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

In [14]:
# initialzie all variables
init = tf.global_variables_initializer()

In [19]:
# run the graph within session

with tf.Session() as sess:
    sess.run(init)
    
    # loop through all the epochs
    for epoch in range(training_epochs):
        avg_cost = 0
        
        #get total batches and loop through them
        total_batches = int(mnist.train.num_examples / batch_size)
        for batch in range(total_batches):
            
            # get trainX and trainY for each batch from training data
            trainX, trainY = mnist.train.next_batch(batch)
            
            # Train & run optimzer, cost in same session and feed values to Variables
            o, c = sess.run([optimizer, cost], feed_dict={X: trainX, Y:trainY})
            
            #compute average cost
            avg_cost += c / total_batches
            
            if (epoch % display_step) == 0:
                print ('batch=', batch+1, 'Cost=', c, 'Average Cost=', avg_cost, ' for Total batches', total_batches)
        
        # Test Model for every epoch
        correct_predection = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
        
        # Calculate Accuracy aftering testing
        accuracy = tf.reduce_mean(tf.cast(correct_predection, tf.float32))
        accuracy_eval = accuracy.eval({X: mnist.test.images, Y: mnist.test.labels})
        
        # print display step with epoch, and cost
        if (epoch % display_step) == 0:
            print ('Epoch=', epoch+1, 'Average Cost=', avg_cost, 'Accuracy=', accuracy_eval)
            
    # print optimization finished for all epochs
    print ("Optimization Finished for all Epochs!")


batch= 1 Cost= nan Average Cost= nan  for Total batches 214
batch= 2 Cost= 0.0 Average Cost= nan  for Total batches 214
batch= 3 Cost= 1966.81 Average Cost= nan  for Total batches 214
batch= 4 Cost= 914.831 Average Cost= nan  for Total batches 214
batch= 5 Cost= 1358.1 Average Cost= nan  for Total batches 214
batch= 6 Cost= 1333.05 Average Cost= nan  for Total batches 214
batch= 7 Cost= 1386.54 Average Cost= nan  for Total batches 214
batch= 8 Cost= 1783.8 Average Cost= nan  for Total batches 214
batch= 9 Cost= 1589.03 Average Cost= nan  for Total batches 214
batch= 10 Cost= 1240.67 Average Cost= nan  for Total batches 214
batch= 11 Cost= 1266.13 Average Cost= nan  for Total batches 214
batch= 12 Cost= 1300.28 Average Cost= nan  for Total batches 214
batch= 13 Cost= 880.426 Average Cost= nan  for Total batches 214
batch= 14 Cost= 806.867 Average Cost= nan  for Total batches 214
batch= 15 Cost= 1087.8 Average Cost= nan  for Total batches 214
batch= 16 Cost= 981.3 Average Cost= nan  for 