# Aim : To Study The Affect Of Batch Normalization

In [1]:
# TensorFlow and tf.keras
import tensorflow as tf

# Helper libraries
import numpy as np
from tensorflow.keras import initializers
from tensorflow.python.keras import activations

print(tf.__version__)

# downloading fashion_mnist data
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

train_images = train_images / 255.0

test_images = test_images / 255.0              


import mlflow.tensorflow
mlflow.tensorflow.autolog()

initializer = tf.keras.initializers.glorot_normal
activation = tf.keras.activations.tanh

c:\users\sonu.ramkumar.jha\desktop\experiments\env\lib\site-packages\numpy\.libs\libopenblas.GK7GX5KEQ4F6UYO3P26ULGBQYHGQO7J4.gfortran-win_amd64.dll
c:\users\sonu.ramkumar.jha\desktop\experiments\env\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


2.5.0




# without batchnorm

In [2]:
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation=activation, kernel_initializer=initializer),
tf.keras.layers.Dense(10, kernel_initializer=initializer)
])

model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

# model summary
model.summary()

with mlflow.start_run():

    model.fit(train_images, train_labels, epochs=10)

    test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

    print('test_loss', test_loss)
    print('test_accuracy', test_acc)

    mlflow.tensorflow.mlflow.log_metric('test_loss', test_loss)
    mlflow.tensorflow.mlflow.log_metric('test_acc', test_acc)  
    mlflow.tensorflow.mlflow.log_param('initializer', initializer)
    mlflow.tensorflow.mlflow.log_param('activation', activation)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: C:\Users\SONURA~1.JHA\AppData\Local\Temp\tmpd1ofocux\model\data\model\assets
313/313 - 0s - loss: 0.3296 - accuracy: 0.8804
test_loss 0.3296279013156891
test_accuracy 0.8804000020027161


# With BatchNormalization

In [3]:
del model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(128, activation=activation, kernel_initializer=initializer),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, kernel_initializer=initializer)
])

model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

# model summary
model.summary()

with mlflow.start_run():

    model.fit(train_images, train_labels, epochs=10)

    test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

    print('test_loss', test_loss)
    print('test_accuracy', test_acc)

    mlflow.tensorflow.mlflow.log_metric('test_loss', test_loss)
    mlflow.tensorflow.mlflow.log_metric('test_acc', test_acc)  
    mlflow.tensorflow.mlflow.log_param('initializer', initializer)
    mlflow.tensorflow.mlflow.log_param('activation', activation)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
batch_normalization (BatchNo (None, 784)               3136      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               100480    
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
Total params: 105,418
Trainable params: 103,594
Non-trainable params: 1,824
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

# Withoud BatchNormalization

In [4]:
# deleting previous model
del model

model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation=activation, kernel_initializer=initializer),
tf.keras.layers.Dense(10, kernel_initializer=initializer)
])

model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

# model summary
model.summary()

with mlflow.start_run():

    model.fit(train_images, train_labels, epochs=20)

    test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

    print('test_loss', test_loss)
    print('test_accuracy', test_acc)

    mlflow.tensorflow.mlflow.log_metric('test_loss', test_loss)
    mlflow.tensorflow.mlflow.log_metric('test_acc', test_acc)  
    mlflow.tensorflow.mlflow.log_param('initializer', initializer)
    mlflow.tensorflow.mlflow.log_param('activation', activation)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
INFO:tensorflow:Assets written to: C:\Users\SONURA~1.JHA\AppData\Local\Temp\tmpghwr50vo\model\data\model\assets
313/313 - 0s - loss: 0.3311 - accur

# With BatchNormalization

In [None]:
del model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(128, activation=activation, kernel_initializer=initializer),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, kernel_initializer=initializer)
])

model.compile(optimizer='adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])

# model summary
model.summary()

with mlflow.start_run():

    model.fit(train_images, train_labels, epochs=20)

    test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

    print('test_loss', test_loss)
    print('test_accuracy', test_acc)

    mlflow.tensorflow.mlflow.log_metric('test_loss', test_loss)
    mlflow.tensorflow.mlflow.log_metric('test_acc', test_acc)  
    mlflow.tensorflow.mlflow.log_param('initializer', initializer)
    mlflow.tensorflow.mlflow.log_param('activation', activation)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 784)               0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 784)               3136      
_________________________________________________________________
dense_6 (Dense)              (None, 128)               100480    
_________________________________________________________________
batch_normalization_3 (Batch (None, 128)               512       
_________________________________________________________________
dense_7 (Dense)              (None, 10)                1290      
Total params: 105,418
Trainable params: 103,594
Non-trainable params: 1,824
_________________________________________________________________
Epoch 1/20
Epoch 2/20

# Observations : 
- Accuracy decreases with BatchNormalization may be because of the smaller network

|epoch|BatchNormalization|train_loss|train_acc|test_loss|test_acc|
|-----|------------------|-----------|---------|---------|--------|
|10|no|0.2407 |0.9107	|0.3508 |0.8766|
|10|yes|0.2863 |0.8935	|0.3526 |0.8742|
|20|no|0.1871 |0.9292	|0.3351 |0.8863|
|20|yes|0.2457 |0.9078|0.3551 |0.8809|