### Implementing Batch Normalization

In [25]:
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os

import sys
import tensorflow as tf

from tensorflow.python.ops import nn_ops

sys.path.append("..")

from datasets.mnist import MNIST_DATASET

In [26]:
%load_ext tensorboard

In [27]:
BATCH_SIZE = 60
SHUFFLE_BUFFER_SIZE = 100

In [28]:
dataset_path = '/Users/rohit/Desktop/datasets/mnist'
x_train, y_train, x_test, y_test = MNIST_DATASET.load_dataset(dataset_path=dataset_path, reshape=False)

In [29]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

In [30]:
model_one = tf.keras.Sequential([
    tf.keras.layers.Dense(100, activation='sigmoid', input_shape=(784, )),
    tf.keras.layers.Dense(100, activation='sigmoid'),
    tf.keras.layers.Dense(100, activation='sigmoid'),
    tf.keras.layers.Dense(10, )
])

model_one.build()

print(model_one.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 100)               78500     
                                                                 
 dense_17 (Dense)            (None, 100)               10100     
                                                                 
 dense_18 (Dense)            (None, 100)               10100     
                                                                 
 dense_19 (Dense)            (None, 10)                1010      
                                                                 
Total params: 99,710
Trainable params: 99,710
Non-trainable params: 0
_________________________________________________________________
None


In [31]:
model_one.compile(optimizer=tf.keras.optimizers.RMSprop(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['sparse_categorical_accuracy'])

In [32]:
logdir = os.path.join("logs", datetime.datetime.now().strftime("batch_norm-%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

model_one.fit(train_dataset, epochs=50, validation_data=test_dataset, callbacks=[tensorboard_callback])

Epoch 1/50


2023-01-22 23:05:03.883763: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-01-22 23:05:13.704480: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x315a8bd60>

In [33]:
class BatchNorm1D(tf.keras.layers.Layer):
    def __init__(self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs):
        super().__init__()

    def build(self, input_shape):
        self.gamma = self.add_weight(
                                name='gamma',
                                shape=(input_shape[-1], ), 
                                initializer='ones',
                                trainable=True)

        self.beta = self.add_weight(
                                name='beta',
                                shape=(input_shape[-1], ), 
                                initializer='zeros',
                                trainable=True)

        self.epsilon = 10e-6

    def call(self, inputs):
        mean = tf.math.reduce_mean(inputs, axis=0)
        variance = tf.math.reduce_mean(tf.math.square(tf.math.subtract(inputs, mean)), axis=0)

        normalized_input = tf.math.divide(tf.math.subtract(inputs, mean), tf.math.sqrt(variance + self.epsilon))

        return tf.math.add(tf.math.multiply(self.gamma, normalized_input), self.beta)

In [34]:
# tf.keras.backend.clear_session()

model_two = tf.keras.Sequential([
    tf.keras.layers.Dense(100, input_shape=(784, )),
    BatchNorm1D(),
    tf.keras.layers.Activation(activation='sigmoid'),
    tf.keras.layers.Dense(100),
    BatchNorm1D(),
    tf.keras.layers.Activation(activation='sigmoid'),
    tf.keras.layers.Dense(100),
    BatchNorm1D(),
    tf.keras.layers.Activation(activation='sigmoid'),
    tf.keras.layers.Dense(10, )
])

model_two.build()

print(model_two.summary())

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, 100)               78500     
                                                                 
 batch_norm1d_6 (BatchNorm1D  (None, 100)              200       
 )                                                               
                                                                 
 activation_6 (Activation)   (None, 100)               0         
                                                                 
 dense_21 (Dense)            (None, 100)               10100     
                                                                 
 batch_norm1d_7 (BatchNorm1D  (None, 100)              200       
 )                                                               
                                                                 
 activation_7 (Activation)   (None, 100)              

In [35]:
model_two.compile(optimizer=tf.keras.optimizers.RMSprop(),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['sparse_categorical_accuracy'])

logdir = os.path.join("logs", datetime.datetime.now().strftime("batch_norm-%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

model_two.fit(train_dataset, epochs=50, validation_data=test_dataset, callbacks=[tensorboard_callback])

Epoch 1/50


2023-01-22 23:19:37.839627: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-01-22 23:19:59.385891: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x315a8ae00>