**Batch Normalization**:
Batch Normalization (BN) is a technique used to improve the training of deep neural networks by normalizing the inputs of each mini-batch. It helps mitigate issues like vanishing/exploding gradients, stabilizes and accelerates training, and can act as a regularizer.


In [1]:
# Import necessary libraries
import tensorflow as tf
import numpy as np

# Load the Fashion MNIST dataset
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist

# Split the dataset into training and validation sets
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

# Normalize the pixel values to the range [0, 1]
X_train, X_valid, X_test = X_train / 255.0, X_valid / 255.0, X_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [2]:
# Define class names for the Fashion MNIST dataset
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [3]:
# Calculate the mean and standard deviation of the pixel values in the training set
pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)

# Standardize the training, validation, and test sets using the mean and standard deviation of the training set
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

In [4]:
# Clear any previous TensorFlow graphs
tf.keras.backend.clear_session()

# Set the random seed for reproducibility
tf.random.set_seed(42)

# Build a Sequential model with Batch Normalization layers
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),  # Flatten the input images (28x28 pixels) into 1D arrays
    tf.keras.layers.BatchNormalization(),  # Batch normalization layer to normalize the input
    tf.keras.layers.Dense(300, activation="relu",
                          kernel_initializer="he_normal"),  # Fully connected layer with 300 units and ReLU activation
    tf.keras.layers.BatchNormalization(),  # Batch normalization layer after the first dense layer
    tf.keras.layers.Dense(100, activation="relu",
                          kernel_initializer="he_normal"),  # Fully connected layer with 100 units and ReLU activation
    tf.keras.layers.BatchNormalization(),  # Batch normalization layer after the second dense layer
    tf.keras.layers.Dense(10, activation="softmax")  # Output layer with 10 units (one for each class) and softmax activation
])

# Print the model summary to show its architecture
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 batch_normalization (Batch  (None, 784)               3136      
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 batch_normalization_1 (Bat  (None, 300)               1200      
 chNormalization)                                                
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Bat  (None, 100)               4

In [5]:
# Compile the model with sparse categorical cross-entropy loss and SGD optimizer
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

# Train the model for 2 epochs using the training set and validate using the validation set
model.fit(X_train_scaled, y_train, epochs=2, validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x7883c9eeb190>

 Batch Normalizationccan be placed before or after the activation function. Both approaches can work, but the best choice might depend on the specific model and problem.

In [6]:
# Clear the name counters and set the random seed
tf.keras.backend.clear_session()
tf.random.set_seed(42)

# Define the model architecture using Sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),  # Flatten the input images (28x28 pixels) into 1D arrays
    tf.keras.layers.Dense(300, kernel_initializer="he_normal", use_bias=False),  # Fully connected layer with 300 units and He initialization
    tf.keras.layers.BatchNormalization(),  # Batch normalization layer to normalize the outputs of the previous layer
    tf.keras.layers.Activation("relu"),  # ReLU activation function
    tf.keras.layers.Dense(100, kernel_initializer="he_normal", use_bias=False),  # Fully connected layer with 100 units and He initialization
    tf.keras.layers.BatchNormalization(),  # Batch normalization layer to normalize the outputs of the previous layer
    tf.keras.layers.Activation("relu"),  # ReLU activation function
    tf.keras.layers.Dense(10, activation="softmax")  # Output layer with 10 units (one for each class) and softmax activation
])

# Compile the model with sparse categorical cross-entropy loss and SGD optimizer
model.compile(loss="sparse_categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

# Train the model for 2 epochs using the training set and validate using the validation set
model.fit(X_train_scaled, y_train, epochs=2, validation_data=(X_valid_scaled, y_valid))


Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x7883c9ee89a0>