# Batch Normalization

In [1]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
%load_ext tensorboard




In [2]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
tf.random.set_seed(42)
np.random.seed(42)

In [4]:
LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),  ## Activation function for the above layer . Like this we can also 
    # seperately define the activation function 
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(), ## Activation function for the above layer 
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)




In [5]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])



In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 266610 

In [7]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10












1719/1719 - 3s - loss: 0.6811 - accuracy: 0.7692 - val_loss: 0.5000 - val_accuracy: 0.8294 - 3s/epoch - 2ms/step
Epoch 2/10
1719/1719 - 3s - loss: 0.4817 - accuracy: 0.8315 - val_loss: 0.4344 - val_accuracy: 0.8514 - 3s/epoch - 1ms/step
Epoch 3/10
1719/1719 - 3s - loss: 0.4406 - accuracy: 0.8447 - val_loss: 0.5209 - val_accuracy: 0.8074 - 3s/epoch - 1ms/step
Epoch 4/10
1719/1719 - 3s - loss: 0.4169 - accuracy: 0.8544 - val_loss: 0.4011 - val_accuracy: 0.8616 - 3s/epoch - 1ms/step
Epoch 5/10
1719/1719 - 3s - loss: 0.4009 - accuracy: 0.8592 - val_loss: 0.3869 - val_accuracy: 0.8660 - 3s/epoch - 1ms/step
Epoch 6/10
1719/1719 - 3s - loss: 0.3843 - accuracy: 0.8650 - val_loss: 0.3823 - val_accuracy: 0.8710 - 3s/epoch - 1ms/step
Epoch 7/10
1719/1719 - 3s - loss: 0.3738 - accuracy: 0.8669 - val_loss: 0.3729 - val_accuracy: 0.8732 - 3s/epoch - 1ms/step
Epoch 8/10
1719/1719 - 3s - loss: 0.3640 - accuracy: 0.8701 - val_loss: 0.3992 - val_accuracy: 0.8572 - 3s/epoch - 1ms/step
Epoch 9/10
1719/171

# BN approach one

In [8]:
del model

In [9]:
LAYERS_BN = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),   ### Applying the batch normalization for the above layer 
    tf.keras.layers.Dense(300, activation="relu"),  ### Activation fucntion for the above layer 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN)

In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization (Batch  (None, 784)               3136      
 Normalization)                                                  
                                                                 
 dense_3 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Bat  (None, 300)               1200      
 chNormalization)                                                
                                                                 
 dense_4 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Bat  (None, 100)              

In [11]:
bn1 = model.layers[1]

In [12]:
bn1

<keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x181ae0be590>

In [13]:
for variable in bn1.variables:
  print(variable.name, variable.trainable)   # whether those are trainable 

batch_normalization/gamma:0 True
batch_normalization/beta:0 True
batch_normalization/moving_mean:0 False
batch_normalization/moving_variance:0 False


In [14]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])



# BN approach 2

In [15]:
del model

In [16]:
LAYERS_BN_BIAS_FALSE = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(100, use_bias=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS_BN_BIAS_FALSE)

In [17]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_3 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_6 (Dense)             (None, 300)               235200    
                                                                 
 batch_normalization_4 (Bat  (None, 300)               1200      
 chNormalization)                                                
                                                                 
 activation (Activation)     (None, 300)               0         
                                                                 
 dense_7 (Dense)             (None, 100)              

# Transfer Learning 

In [19]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]


tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)




model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])


model.summary()

history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

model.save("pretrained_mnist_model.h5")

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_9 (Dense)             (None, 300)               235500    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 300)               0         
                                                                 
 dense_10 (Dense)            (None, 100)               30100     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_11 (Dense)            (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 26661

  saving_api.save_model(


#### Transfer Learning 

#### New problem : Classify the hand written digits into odd and even 

In [20]:
pretrained_mnist_model = tf.keras.models.load_model("pretrained_mnist_model.h5")

In [21]:
pretrained_mnist_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_9 (Dense)             (None, 300)               235500    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 300)               0         
                                                                 
 dense_10 (Dense)            (None, 100)               30100     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_11 (Dense)            (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 26661

In [22]:
for layer in pretrained_mnist_model.layers:
  print(f"{layer.name}: {layer.trainable}")

flatten_3: True
dense_9: True
leaky_re_lu_2: True
dense_10: True
leaky_re_lu_3: True
dense_11: True


In [23]:
for layer in pretrained_mnist_model.layers[:-1]: #leaves the last layer unfreezed =>last layer is trainable
  layer.trainable = False # freezing the layers from getting trained
  print(f"{layer.name}: {layer.trainable}")

flatten_3: False
dense_9: False
leaky_re_lu_2: False
dense_10: False
leaky_re_lu_3: False


In [24]:
for layer in pretrained_mnist_model.layers:
  print(f"{layer.name}: {layer.trainable}")

flatten_3: False
dense_9: False
leaky_re_lu_2: False
dense_10: False
leaky_re_lu_3: False
dense_11: True


In [25]:
try:
  del new_model
except:
  pass

In [53]:
lower_pretrained_layers = pretrained_mnist_model.layers[:-1]
new_model = tf.keras.models.Sequential(lower_pretrained_layers)

new_model.add(
    tf.keras.layers.Dense(2, activation="softmax")
)

In [54]:
new_model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 784)               0         
                                                                 
 dense_9 (Dense)             (None, 300)               235500    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 300)               0         
                                                                 
 dense_10 (Dense)            (None, 100)               30100     
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_15 (Dense)            (None, 2)                 202       
                                                                 
Total params: 265802 (1.01 MB)
Trainable params: 202 (

In [55]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [56]:
np.where(y_train%2 == 0, 1, 0)

array([0, 0, 1, ..., 0, 1, 1])

In [57]:
y_train[0], y_train[-1]

(7, 8)

In [58]:
def update_even_odd_labels(labels):
  for idx, label in enumerate(labels):
    labels[idx] = np.where(label % 2 == 0, 1, 0)
  return labels

In [59]:
y_train_bin, y_test_bin, y_valid_bin = update_even_odd_labels([y_train, y_test, y_valid])

In [60]:
np.unique(y_train_bin)

array([0, 1])

In [61]:
new_model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])



In [62]:
history = new_model.fit(X_train, y_train_bin, epochs=10,
                    validation_data=(X_valid, y_valid_bin), verbose=2)

Epoch 1/10
1719/1719 - 2s - loss: 0.3189 - accuracy: 0.8644 - val_loss: 0.2740 - val_accuracy: 0.8912 - 2s/epoch - 1ms/step
Epoch 2/10
1719/1719 - 2s - loss: 0.2863 - accuracy: 0.8815 - val_loss: 0.2580 - val_accuracy: 0.8972 - 2s/epoch - 1ms/step
Epoch 3/10
1719/1719 - 2s - loss: 0.2771 - accuracy: 0.8868 - val_loss: 0.2512 - val_accuracy: 0.9004 - 2s/epoch - 1ms/step
Epoch 4/10
1719/1719 - 2s - loss: 0.2711 - accuracy: 0.8899 - val_loss: 0.2472 - val_accuracy: 0.9042 - 2s/epoch - 1ms/step
Epoch 5/10
1719/1719 - 2s - loss: 0.2665 - accuracy: 0.8921 - val_loss: 0.2414 - val_accuracy: 0.9062 - 2s/epoch - 1ms/step
Epoch 6/10
1719/1719 - 2s - loss: 0.2633 - accuracy: 0.8941 - val_loss: 0.2386 - val_accuracy: 0.9068 - 2s/epoch - 1ms/step
Epoch 7/10
1719/1719 - 2s - loss: 0.2600 - accuracy: 0.8957 - val_loss: 0.2360 - val_accuracy: 0.9066 - 2s/epoch - 1ms/step
Epoch 8/10
1719/1719 - 2s - loss: 0.2579 - accuracy: 0.8965 - val_loss: 0.2394 - val_accuracy: 0.9060 - 2s/epoch - 1ms/step
Epoch 9/

In [63]:
new_model.evaluate(X_test, y_test_bin)



[0.25388863682746887, 0.9014999866485596]

In [64]:
X_new = X_test[:3]

y_test[:3], y_test_bin[:3]

(array([7, 2, 1], dtype=uint8), array([0, 1, 0]))

In [65]:
np.argmax(new_model.predict(X_new), axis=-1)



array([0, 1, 0], dtype=int64)