<a href="https://colab.research.google.com/github/josephxlp/PyTorch100Days/blob/main/W2DAY9_Batch_Normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

W2DAY9_Batch_Normalization

Goal:

- use **nn.BatchNorm1d** in a multi-layer perceptron to normalize intermediate activations

Desc:
- manually add batch normalization layers into an MLP and observe how it stabilizes and speeds up training.

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [3]:
transform = transforms.ToTensor()
train_ds = datasets.FashionMNIST(root='data', train=True, download=True, transform=transform)

100%|██████████| 26.4M/26.4M [00:02<00:00, 12.5MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 208kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.87MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 8.19MB/s]


In [4]:
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

In [5]:
class MLPwithBatchNorm(nn.Module):
    def __init__(self):
        super(MLPwithBatchNorm, self).__init__()
        self.fc1 = nn.Linear(28*28, 256)
        self.bn1 = nn.BatchNorm1d(256)

        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)

        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)

        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.bn1(self.fc1(x)))
        x = F.relu(self.bn2(self.fc2(x)))
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.fc4(x)
        return x

In [6]:
model = MLPwithBatchNorm()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [8]:
for epoch in range(7):
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_dl, desc=f'Epoch {epoch+1}', leave=False)

    for images, labels in pbar:
        outputs = model(images)
        loss = criterion(outputs,labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        pbar.set_postfix(loss=loss.item())

    print(f'Epoch {epoch+1} avg loss: {running_loss/len(train_dl)}')





Epoch 1 avg loss: 0.47572906918998464




Epoch 2 avg loss: 0.344055223431605




Epoch 3 avg loss: 0.3062250047985679




Epoch 4 avg loss: 0.2825028890930513




Epoch 5 avg loss: 0.258559383114359




Epoch 6 avg loss: 0.24299240507074257


                                                                      

Epoch 7 avg loss: 0.2291225276148713


