In [1]:
import time
import numpy as np
import tensorflow as tf

In [2]:
def one_hot_matrix(labels, depth):
    depth = tf.constant(depth, tf.int32, name="depth")
    one_hot_matrix = tf.one_hot(labels, depth, axis=-1)
    
    with tf.Session() as sess:
        one_hot = sess.run(one_hot_matrix)
        
    return one_hot

In [None]:
#Old way
mnist = tf.keras.datasets.mnist
(x_train_orig, y_train_orig), (x_test_orig, y_test_orig) = mnist.load_data()

#Flatten the training and test images
x_train_flat = x_train_orig.reshape(x_train_orig.shape[0], -1).T
x_test_flat = x_test_orig.reshape(x_test_orig.shape[0], -1).T

#Normalize image vectors
x_train = x_train_flat / 255.0
x_test = x_test_flat / 255.0

#Convert training and test labels to on hot matrices
num_classes = 10
y_train = one_hot_matrix(y_train_orig, num_classes)
y_test = one_hot_matrix(y_test_orig, num_classes)

n_x = x_train.shape[0]
n_y = x_test.shape[0]

print ("number of training examples = " + str(x_train.shape[1]))
print ("number of test examples = " + str(x_test.shape[1]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))

#print(y_train_orig[0:5])
#print(y_train[:, 0:5])

In [3]:
#New way
mnist = tf.keras.datasets.mnist
(x_train_orig, y_train_orig), (x_test_orig, y_test_orig) = mnist.load_data()

#Flatten the training and test images
x_train_flat = x_train_orig.reshape(x_train_orig.shape[0], -1)
x_test_flat = x_test_orig.reshape(x_test_orig.shape[0], -1)

#Normalize image vectors
x_train = x_train_flat / 255.0
x_test = x_test_flat / 255.0

#Convert training and test labels to on hot matrices
num_classes = 10
y_train = one_hot_matrix(y_train_orig, num_classes)
y_test = one_hot_matrix(y_test_orig, num_classes)

n_x = x_train.shape[1]
n_y = x_test.shape[1]

print ("number of training examples = " + str(x_train.shape[0]))
print ("number of test examples = " + str(x_test.shape[0]))
print ("X_train shape: " + str(x_train.shape))
print ("Y_train shape: " + str(y_train.shape))
print ("X_test shape: " + str(x_test.shape))
print ("Y_test shape: " + str(y_test.shape))

print(x_train[0:2, :])
print(y_train[0:2, :])

print(x_train_flat.shape)

number of training examples = 60000
number of test examples = 10000
X_train shape: (60000, 784)
Y_train shape: (60000, 10)
X_test shape: (10000, 784)
Y_test shape: (10000, 10)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
(60000, 784)


In [None]:
#Old way
def create_placeholders(n_x, n_y):
    X = tf.placeholder(tf.float32, shape=(n_x, None), name="X")
    Y = tf.placeholder(tf.float32, shape=(n_y, None), name="Y")
    return X, Y

In [4]:
#New way
def create_placeholders(n_x, n_y):
    X = tf.placeholder(tf.float32, shape=(None, n_x), name="X")
    Y = tf.placeholder(tf.float32, shape=(None, n_y), name="Y")
    return X, Y

In [None]:
#Old way
def initialize_parameters(n_x):
    W1 = tf.get_variable("W1", [100, n_x], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [100, 1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", [100, 100], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", [100, 1], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", [10, 100], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", [10, 1], initializer = tf.zeros_initializer())
    
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2,
        "W3": W3,
        "b3": b3
    }
    
    return parameters

In [5]:
#New way
def initialize_parameters(n_x):
    W1 = tf.get_variable("W1", [n_x, 100], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [1, 100], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", [100, 100], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", [1, 100], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", [100, 10], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", [1, 10], initializer = tf.zeros_initializer())
    
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2,
        "W3": W3,
        "b3": b3
    }
    
    return parameters

In [None]:
#Old way
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    Z1 = tf.matmul(W1, X) + b1
    A1 = tf.nn.relu(Z1)
    Z2 = tf.matmul(W2, A1) + b2
    A2 = tf.nn.relu(Z2)
    Z3 = tf.matmul(W3, A2) + b3
    
    return Z3

In [6]:
#New way
def forward_propagation(X, parameters):
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    
    Z1 = tf.matmul(X, W1) + b1
    A1 = tf.nn.relu(Z1)
    Z2 = tf.matmul(A1, W2) + b2
    A2 = tf.nn.relu(Z2)
    Z3 = tf.matmul(A2, W3) + b3
    
    return Z3

In [None]:
#Old way
def compute_cost(Z, Y):
    logits = tf.transpose(Z)
    labels = tf.transpose(Y)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))            
    return cost

In [7]:
#New way
def compute_cost(Z, Y):   
    logits = Z
    labels = Y
    
    #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels)
    #cost = tf.reduce_mean(cross_entropy)            
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = labels)
    cost = tf.reduce_mean(cross_entropy)    
    return cost

In [None]:
#Old way
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.01, num_epochs = 50):
    
    tf.reset_default_graph()
    tf.set_random_seed(1)
    
    (n_x, m) = X_train.shape
    n_y = Y_train.shape[0]
    #costs = []
    
    X, Y = create_placeholders(n_x, n_y)
    
    parameters = initialize_parameters(n_x)
    
    Z = forward_propagation(X, parameters)
    
    cost = compute_cost(Z, Y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        begin_time = time.time()
        for epoch in range(num_epochs):
            
            _, epoch_cost = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train})   
            if (epoch % 10 == 0):
                print("Cost after epoch {}: {}".format(epoch, epoch_cost))
         
        end_time = time.time()
        
        print('Time elapsed {:.3f} (hh:mm:ss.ms)'.format(end_time - begin_time))
        #print("Model trained in %.2f seconds" % (end_time - begin_time))
        
        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z), tf.argmax(Y))
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy: {:.4f}".format(train_accuracy))
        print("Test Accuracy: {:.4f}".format(test_accuracy))

In [10]:
#New way
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.01, num_epochs = 50):
    
    tf.reset_default_graph()
    tf.set_random_seed(1)
    
    (m, n_x) = X_train.shape
    n_y = Y_train.shape[1]
    #costs = []
    
    X, Y = create_placeholders(n_x, n_y)
    
    parameters = initialize_parameters(n_x)
    
    Z = forward_propagation(X, parameters)
    
    cost = compute_cost(Z, Y)
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        begin_time = time.time()
        for epoch in range(num_epochs):
            
            _, epoch_cost = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train})   
            if (epoch % 10 == 0):
                print("Cost after epoch {}: {}".format(epoch, epoch_cost))
         
        end_time = time.time()
        
        print('Time elapsed {:.3f} (hh:mm:ss.ms)'.format(end_time - begin_time))
        #print("Model trained in %.2f seconds" % (end_time - begin_time))
        
        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z, axis=1), tf.argmax(Y, axis=1))
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy: {:.4f}".format(train_accuracy))
        print("Test Accuracy: {:.4f}".format(test_accuracy))

In [11]:
model(x_train, y_train, x_test, y_test)

Cost after epoch 0: 2.372673273086548
Cost after epoch 10: 0.4198455214500427
Cost after epoch 20: 0.2645059823989868
Cost after epoch 30: 0.2038988173007965
Cost after epoch 40: 0.15980367362499237
Time elapsed 31.800 (hh:mm:ss.ms)
Train Accuracy: 0.9615
Test Accuracy: 0.9557
