In [870]:
import tensorflow as tf
import numpy as np
from datetime import datetime

In [871]:
logdir = "/tmp/mylogs/tf/"

#Visualize Plots - % tensorboard --logdir=/tmp/mylogs/tf/

classes = 10

# Training parameters.
lr = 0.001
batch_size = 100
display_step = 600
epochs = 50

# Network parameters.
conv1_filters = 32 
conv2_filters = 64 
fc1_units = 1024

In [872]:
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

# Normalization
x_train, x_test = x_train / 255., x_test / 255.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat(epochs).shuffle(5000).batch(batch_size).prefetch(1)

In [873]:
# layers weight & bias parameters
random_normal = tf.initializers.RandomNormal()
 
weights = {
    'wc1': tf.Variable(random_normal([5, 5, 1, conv1_filters])),
    'wc2': tf.Variable(random_normal([5, 5, conv1_filters, conv2_filters])),
    'wd1': tf.Variable(random_normal([7*7*64, fc1_units])),
    'out': tf.Variable(random_normal([fc1_units, classes]))
}

biases = {
    'bc1': tf.Variable(tf.zeros([conv1_filters])),
    'bc2': tf.Variable(tf.zeros([conv2_filters])),
    'bd1': tf.Variable(tf.zeros([fc1_units])),
    'out': tf.Variable(tf.zeros([classes]))
}

In [874]:
@tf.function
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

@tf.function
def maxpool2d(x, k=2):
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [875]:
# 2 Layer CNN Model
@tf.function
def model(x,weights,biases):
    x = tf.reshape(x, [-1, 28, 28, 1])
    with tf.name_scope('CONV1'):
        conv1 = conv2d(x, weights['wc1'], biases['bc1'])
        conv1 = maxpool2d(conv1, k=2)

    with tf.name_scope('CONV2'):
        conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
        conv2 = maxpool2d(conv2, k=2)

    with tf.name_scope('FLATTEN'):
        flatten = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    
    with tf.name_scope('FC'):
        fc = tf.add(tf.matmul(flatten, weights['wd1']), biases['bd1'])
        fc = tf.nn.relu(fc)

    with tf.name_scope('Output'):
        output = tf.add(tf.matmul(fc, weights['out']), biases['out'])
        return tf.nn.softmax(output)

In [876]:
# Cross-Entropy loss function
@tf.function
def cross_entropy(y_pred, y_true):
    y_true = tf.one_hot(y_true, depth=classes)
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    loss = tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
    return loss

# Accuracy metric
@tf.function
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
    return acc


In [877]:
# Optimization
def optimization(x, y,optimizer,weights,biases):
    with tf.GradientTape() as g:
        pred = model(x,weights,biases)
        loss = cross_entropy(pred, y)
    trainable_variables = list(weights.values()) + list(biases.values())
    gradients = g.gradient(loss, trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainable_variables))
    return pred,loss

In [867]:
now = datetime.now()
logdir_adam = logdir + "adam/"+ now.strftime("%Y%m%d-%H%M%S") + "/"
writer = tf.summary.create_file_writer(logdir_adam)

# ADAM optimizer.
optimizer = tf.optimizers.Adam(lr)

# Run training for the given number of steps.
with writer.as_default():
    for step, (batch_x, batch_y) in enumerate(train_data, 1):
            
        pred,loss = optimization(batch_x, batch_y,optimizer,weights,biases)
        if step % display_step == 0:
            
            acc = accuracy(pred, batch_y)
            val_pred = model(x_test,weights,biases)
            val_acc = accuracy(val_pred, y_test)
            val_loss = cross_entropy(val_pred,y_test)

            tf.summary.scalar('train accuracy', acc, step = step//display_step)
            tf.summary.scalar('training loss', loss, step=step//display_step)
            tf.summary.scalar('test accuracy', val_acc, step = step//display_step)
            tf.summary.scalar('test loss', val_loss, step=step//display_step)
            print("epoch: %i, train loss: %f, train accuracy: %f, val accuracy: %f, val loss: %f" % (step/display_step, loss, acc, val_acc, val_loss))
            writer.flush()

print('Total Training Time ADAM - ', datetime.now()-now)

epoch: 1, train loss: 3.301328, train accuracy: 0.980000, val accuracy: 0.986100, val loss: 437.595276
epoch: 2, train loss: 1.751071, train accuracy: 0.990000, val accuracy: 0.991800, val loss: 249.643219
epoch: 3, train loss: 4.362897, train accuracy: 0.980000, val accuracy: 0.991600, val loss: 261.813507
epoch: 4, train loss: 0.666313, train accuracy: 1.000000, val accuracy: 0.990400, val loss: 301.683350
epoch: 5, train loss: 0.313856, train accuracy: 1.000000, val accuracy: 0.992800, val loss: 257.396851
epoch: 6, train loss: 0.178863, train accuracy: 1.000000, val accuracy: 0.990900, val loss: 303.299042
epoch: 7, train loss: 0.492146, train accuracy: 1.000000, val accuracy: 0.991000, val loss: 306.120605
epoch: 8, train loss: 0.016612, train accuracy: 1.000000, val accuracy: 0.991500, val loss: 327.495117
epoch: 9, train loss: 0.009616, train accuracy: 1.000000, val accuracy: 0.991700, val loss: 334.207703
epoch: 10, train loss: 0.341440, train accuracy: 1.000000, val accuracy: 

In [868]:
# Reinitializing weights for SGD optimizer
weights['wc1'].assign(random_normal([5, 5, 1, conv1_filters]))
weights['wc2'].assign(random_normal([5, 5, conv1_filters, conv2_filters]))
weights['wd1'].assign(random_normal([7*7*64, fc1_units]))
weights['out'].assign(random_normal([fc1_units, classes]))

biases['bc1'].assign(tf.zeros([conv1_filters]))
biases['bc2'].assign(tf.zeros([conv2_filters]))
biases['bd1'].assign(tf.zeros([fc1_units]))
biases['out'].assign(tf.zeros([classes]))

<tf.Variable 'UnreadVariable' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [869]:
# SGD optimizer.
optimizer = tf.optimizers.SGD(lr)

now = datetime.now()
logdir_sgd = logdir + "sgd/" + now.strftime("%Y%m%d-%H%M%S") + "/"
writer = tf.summary.create_file_writer(logdir_sgd)

# Training Batches
with writer.as_default():
    for step, (batch_x, batch_y) in enumerate(train_data, 1):
        
        pred,loss = optimization(batch_x, batch_y,optimizer,weights,biases)
        if step % display_step == 0:
            
            acc = accuracy(pred, batch_y)
            val_pred = model(x_test,weights,biases)
            val_acc = accuracy(val_pred, y_test)
            val_loss = cross_entropy(val_pred,y_test)

            tf.summary.scalar('train accuracy', acc, step = step//display_step)
            tf.summary.scalar('training loss', loss, step=step//display_step)
            tf.summary.scalar('test accuracy', val_acc, step = step//display_step)
            tf.summary.scalar('test loss', val_loss, step=step//display_step)
            print("epoch: %i, train loss: %f, train accuracy: %f, val accuracy: %f, val loss: %f" % (step/display_step, loss, acc, val_acc, val_loss))
            writer.flush()

print('Total Training Time SGD - ', datetime.now()-now)

epoch: 1, train loss: 4.479516, train accuracy: 0.980000, val accuracy: 0.979400, val loss: 644.624634
epoch: 2, train loss: 11.314090, train accuracy: 0.970000, val accuracy: 0.986800, val loss: 413.193970
epoch: 3, train loss: 1.648815, train accuracy: 0.990000, val accuracy: 0.989300, val loss: 322.847076
epoch: 4, train loss: 0.279901, train accuracy: 1.000000, val accuracy: 0.989400, val loss: 286.581268
epoch: 5, train loss: 8.757600, train accuracy: 0.990000, val accuracy: 0.989400, val loss: 289.366638
epoch: 6, train loss: 0.370599, train accuracy: 1.000000, val accuracy: 0.988300, val loss: 373.910370
epoch: 7, train loss: 0.876777, train accuracy: 1.000000, val accuracy: 0.991100, val loss: 303.402588
epoch: 8, train loss: 0.637351, train accuracy: 1.000000, val accuracy: 0.990500, val loss: 271.908752
epoch: 9, train loss: 1.178825, train accuracy: 0.990000, val accuracy: 0.991100, val loss: 259.002533
epoch: 10, train loss: 0.145763, train accuracy: 1.000000, val accuracy: