In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

##**Importing Libraries**

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os

print(tf.__version__)
print(keras.__version__)

2.4.1
2.4.0


##**Hyper Parameters**

In [3]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

tf.random.set_seed(777)

##**Creating a Checkpoint Directory**

In [4]:
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'mnist_cnn_subclass'

checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)

##**MNIST/Fashion MNIST Data**

In [5]:
mnist = keras.datasets.mnist
class_names = ['0','1','2','3','4','5','6','7','8','9']

##**Datasets**

In [6]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.astype(np.float32) / 255.
test_images = test_images.astype(np.float32) / 255.
train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)

train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(batch_size)

##**Model Class**

In [7]:
class MNISTModel(tf.keras.Model):
  def __init__(self):
    super(MNISTModel, self).__init__()
    self.conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3,3], padding='SAME', activation=tf.nn.relu)
    self.pool1 = keras.layers.MaxPool2D(padding='SAME')
    self.conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3,3], padding='SAME', activation=tf.nn.relu)
    self.pool2 = keras.layers.MaxPool2D(padding='SAME')
    self.conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3,3], padding='SAME', activation=tf.nn.relu)
    self.pool3 = keras.layers.MaxPool2D(padding='SAME')
    self.pool3_flat = keras.layers.Flatten()
    self.dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)
    self.drop4 = keras.layers.Dropout(rate=0.4)
    self.dense5 = keras.layers.Dense(units=10)
  def call(self, inputs, training=False):
    net = self.conv1(inputs)
    net = self.pool1(net)
    net = self.conv2(net)
    net = self.pool2(net)
    net = self.conv3(net)
    net = self.pool3(net)
    net = self.pool3_flat(net)
    net = self.dense4(net)
    net = self.drop4(net)
    net = self.dense5(net)
    return net

In [8]:
model = MNISTModel()
temp_inputs = keras.Input(shape=(28, 28, 1))
model(temp_inputs)
model.summary()

Model: "mnist_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              multiple                  320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
conv2d_1 (Conv2D)            multiple                  18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
conv2d_2 (Conv2D)            multiple                  73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 multiple                  0         
_________________________________________________________________
flatten (Flatten)            multiple                  

##**Loss Function**

In [9]:
@tf.function
def loss_fn(model, images, labels):
  logits = model(images, training=True)
  loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(
      y_pred=logits, y_true=labels, from_logits=True))
  return loss

##**Calculating Gradient**

In [10]:
@tf.function
def grad(model, images, labels):
  with tf.GradientTape() as tape:
    loss = loss_fn(model, images, labels)
  return tape.gradient(loss, model.variables)

##**Calculating Model's Accuracy**

In [11]:
@tf.function
def evaluate(model, images, labels):
  logits = model(images, training=False)
  correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  return accuracy

##**Optimizer**

In [12]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

##**Creating a Checkpoint**

In [13]:
checkpoint = tf.train.Checkpoint(cnn=model)

##**Training**

In [14]:
@tf.function
def train(model, images,labels):
  grads = grad(model, images, labels)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

In [15]:
print('Learning started. It takes some time')
for epoch in range(training_epochs):
  avg_loss = 0.
  avg_train_acc = 0.
  avg_test_acc = 0.
  train_step = 0
  test_step = 0

  for images, labels in train_dataset:
    train(model, images, labels)
    loss = loss_fn(model, images, labels)
    acc = evaluate(model, images,labels)
    avg_loss = avg_loss + loss
    avg_train_acc = avg_train_acc + acc
    train_step += 1
  avg_loss = avg_loss / train_step
  avg_train_acc = avg_train_acc / train_step

  for images, labels in test_dataset:
    acc = evaluate(model, images, labels)
    avg_test_acc = avg_test_acc + acc
    test_step += 1
  avg_test_acc = avg_test_acc / test_step

  print('Epoch:','{}'.format(epoch+1), 'loss = ', '{:.8f}'.format(avg_loss),
        'train accuracy = ', '{:.4f}'.format(avg_train_acc),
        'test accuracy = ', '{:.4f}'.format(avg_test_acc))
  checkpoint.save(file_prefix=checkpoint_prefix)
print('Learning Finished!')


Learning started. It takes some time
Epoch: 1 loss =  0.16360113 train accuracy =  0.9592 test accuracy =  0.9864
Epoch: 2 loss =  0.03926753 train accuracy =  0.9908 test accuracy =  0.9896
Epoch: 3 loss =  0.02550861 train accuracy =  0.9935 test accuracy =  0.9923
Epoch: 4 loss =  0.01894744 train accuracy =  0.9953 test accuracy =  0.9930
Epoch: 5 loss =  0.01463522 train accuracy =  0.9965 test accuracy =  0.9930
Epoch: 6 loss =  0.01242791 train accuracy =  0.9971 test accuracy =  0.9930
Epoch: 7 loss =  0.00901427 train accuracy =  0.9978 test accuracy =  0.9934
Epoch: 8 loss =  0.00748818 train accuracy =  0.9985 test accuracy =  0.9897
Epoch: 9 loss =  0.00706637 train accuracy =  0.9985 test accuracy =  0.9922
Epoch: 10 loss =  0.00493953 train accuracy =  0.9988 test accuracy =  0.9926
Epoch: 11 loss =  0.00384440 train accuracy =  0.9994 test accuracy =  0.9931
Epoch: 12 loss =  0.00341674 train accuracy =  0.9992 test accuracy =  0.9938
Epoch: 13 loss =  0.00307059 train a

##**Test_accuracy = 99.29%**