In [None]:
Batch No

In [1]:
import matplotlib .pyplot as plt
import torch
import os
import numpy as np
%matplotlib inline

In [2]:
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as dsets

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper_parameter
num_epochs = 5
num_classes = 10
batch_size =100
learning_rate = 0.001

# MNIST Datasets
train_dataset = dsets.MNIST(root='./data',
                                train=True,
                                transform=transforms.ToTensor(),
                                download=True)

test_dataset = dsets.MNIST(root='./data',
                            train=False,
                            transform=transforms.ToTensor())

#DataLoader provides queue and threads
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)



### Simple Neural network with two fully connected layers

In [4]:

class CNN(nn.Module):
    def __init__(self,use_batchnorm,input_size =784,hidden_dim= 256,num_classes=10):
        super(CNN,self).__init__()
        self.use_batchnorm = use_batchnorm
        self.input_size = input_size
        self.hidden_dim = hidden_dim
        self.num_classes = num_classes

        if use_batchnorm:
            self.fc1 = nn.Linear(input_size,hidden_dim* 2,bias=False)
            self.batch_norm1 = nn.BatchNorm1d(hidden_dim*2)
        else:
            self.fc1 = nn.Linear(input_size,hidden_dim* 2)

        if use_batchnorm:
            self.fc2 = nn.Linear(hidden_dim* 2,hidden_dim,bias=False)
            self.batch_norm2 = nn.BatchNorm1d(hidden_dim)
        else:
            self.fc2 = nn.Linear(hidden_dim* 2,hidden_dim)
        
        self.fc3 = nn.Linear(hidden_dim,num_classes)  # fully connected Layer,output 10 classes
    
    def forward(self,x):
        # flatten Image
        x = x.view(-1,28*28)  
        # all hidden layers + optional Batch Norm + relu activation
        x = self.fc1(x)
        if self.use_batchnorm:
            x = self.batch_norm1(x)
        x = F.relu(x)

        # Second layer
        x = self.fc2(x)
        if self.use_batchnorm:
            x = self.batch_norm2(x)
        x = F.relu(x)

        # third layer no Relu or Batch Normalization
        out = self.fc3(x)
        return out 


### training with simple neural network using batch norm

In [5]:
learning_rate= 0.01
model = CNN(use_batchnorm=True).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

# Train the model 
total_step = len(train_loader)
print("total_step :",total_step)

for epoch in range(num_epochs):
    for i,(images,labels) in enumerate (train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # forward pass 
        outputs = model(images)
        loss = criterion(outputs,labels)

        # backward and optimize 
        optimizer.zero_grad() # clear gradient for this steps
        loss.backward()     # calculate gradient
        optimizer.step()    #apply gradients

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}],Step [{}/{}],loss:{:.4f}'
            .format(epoch+1,num_epochs,i+1,total_step,loss.item()))  

# test the model 
model.eval()    # eval model (batchnorm uses moving mean./varience instead of mini-batch mean and varience)
with torch.no_grad():
    correct = 0
    total = 0
    for images ,labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # print("outputs data",outputs.data)
        _ ,predicted =torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted==labels).sum().item()
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'batch_norm.ckpt')

total_step : 600
Epoch [1/5],Step [100/600],loss:0.6437
Epoch [1/5],Step [200/600],loss:0.5020
Epoch [1/5],Step [300/600],loss:0.3546
Epoch [1/5],Step [400/600],loss:0.4366
Epoch [1/5],Step [500/600],loss:0.2312
Epoch [1/5],Step [600/600],loss:0.1859
Epoch [2/5],Step [100/600],loss:0.1920
Epoch [2/5],Step [200/600],loss:0.1270
Epoch [2/5],Step [300/600],loss:0.2105
Epoch [2/5],Step [400/600],loss:0.1788
Epoch [2/5],Step [500/600],loss:0.1720
Epoch [2/5],Step [600/600],loss:0.1520
Epoch [3/5],Step [100/600],loss:0.1556
Epoch [3/5],Step [200/600],loss:0.1103
Epoch [3/5],Step [300/600],loss:0.1402
Epoch [3/5],Step [400/600],loss:0.1155
Epoch [3/5],Step [500/600],loss:0.2240
Epoch [3/5],Step [600/600],loss:0.1020
Epoch [4/5],Step [100/600],loss:0.0835
Epoch [4/5],Step [200/600],loss:0.0896
Epoch [4/5],Step [300/600],loss:0.1314
Epoch [4/5],Step [400/600],loss:0.1274
Epoch [4/5],Step [500/600],loss:0.0667
Epoch [4/5],Step [600/600],loss:0.0516
Epoch [5/5],Step [100/600],loss:0.0702
Epoch [5

#### training with simple neural network using no batch norm

In [7]:
learning_rate= 0.01
model = CNN(use_batchnorm=False).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

# Train the model 
total_step = len(train_loader)
print("total_step :",total_step)

for epoch in range(num_epochs):
    for i,(images,labels) in enumerate (train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # forward pass 
        outputs = model(images)
        loss = criterion(outputs,labels)

        # backward and optimize 
        optimizer.zero_grad() # clear gradient for this steps
        loss.backward()     # calculate gradient
        optimizer.step()    #apply gradients

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}],Step [{}/{}],loss:{:.4f}'
            .format(epoch+1,num_epochs,i+1,total_step,loss.item()))  

# test the model 
model.eval()    # eval model (batchnorm uses moving mean./varience instead of mini-batch mean and varience)
with torch.no_grad():
    correct = 0
    total = 0
    for images ,labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # print("outputs data",outputs.data)
        _ ,predicted =torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted==labels).sum().item()
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'batch_norm.ckpt')

total_step : 600
Epoch [1/5],Step [100/600],loss:2.2642
Epoch [1/5],Step [200/600],loss:2.1886
Epoch [1/5],Step [300/600],loss:2.0825
Epoch [1/5],Step [400/600],loss:1.8715
Epoch [1/5],Step [500/600],loss:1.5559
Epoch [1/5],Step [600/600],loss:1.2178
Epoch [2/5],Step [100/600],loss:1.1137
Epoch [2/5],Step [200/600],loss:0.8273
Epoch [2/5],Step [300/600],loss:0.7541
Epoch [2/5],Step [400/600],loss:0.5851
Epoch [2/5],Step [500/600],loss:0.5886
Epoch [2/5],Step [600/600],loss:0.5442
Epoch [3/5],Step [100/600],loss:0.5028
Epoch [3/5],Step [200/600],loss:0.3960
Epoch [3/5],Step [300/600],loss:0.5012
Epoch [3/5],Step [400/600],loss:0.4388
Epoch [3/5],Step [500/600],loss:0.3448
Epoch [3/5],Step [600/600],loss:0.3268
Epoch [4/5],Step [100/600],loss:0.3341
Epoch [4/5],Step [200/600],loss:0.4714
Epoch [4/5],Step [300/600],loss:0.4561
Epoch [4/5],Step [400/600],loss:0.3851
Epoch [4/5],Step [500/600],loss:0.4393
Epoch [4/5],Step [600/600],loss:0.4695
Epoch [5/5],Step [100/600],loss:0.2513
Epoch [5

### convolutional Neural Network (two convolutional neural Network)

In [8]:
class CNN(nn.Module):
    def __init__(self,num_classes):
        super(CNN,self).__init__()
        
        self.layer1 = nn.Sequential(                    #input size (1,28,28)
            nn.Conv2d(1, 16, kernel_size=5, padding=2), #in_channel=1,out_channel=16,filter size=5
            nn.BatchNorm2d(16),     # o/p size (16,28,28)
            nn.ReLU(),              #activation  o/p size  (16,28,28)
            nn.MaxPool2d(2))        #output size after pooling (16,14,14)
        
        
        self.layer2 = nn.Sequential(    #input shape (16,14,14)
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))          #output shape (32,7,7)
        
        self.fc = nn.Linear(32*7*7, num_classes)     # fully connected Layer,output 10 classes
    
    def forward(self,x):
        
        out = self.layer1(x)
        out = self.layer2(out)
        
        out = out.view(out.size(0),-1)    #flatten the output of conv2d to feed into fully connected layers
        
        out = self.fc(out)
        return out 


In [9]:

model = CNN(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

# Train the model 
total_step = len(train_loader)
print("total_step :",total_step)

for epoch in range(num_epochs):
    for i,(images,labels) in enumerate (train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # forward pass 
        outputs = model(images)
        loss = criterion(outputs,labels)

        # backward and optimize 
        optimizer.zero_grad() # clear gradient for this steps
        loss.backward()     # calculate gradient
        optimizer.step()    #apply gradients

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}],Step [{}/{}],loss:{:.4f}'
            .format(epoch+1,num_epochs,i+1,total_step,loss.item()))  

# test the model 
model.eval()    # eval model (batchnorm uses moving mean./varience instead of mini-batch mean and varience)
with torch.no_grad():
    correct = 0
    total = 0
    for images ,labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # print("outputs data",outputs.data)
        _ ,predicted =torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted==labels).sum().item()
    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'cnn.ckpt')

total_step : 600
Epoch [1/5],Step [100/600],loss:0.1866
Epoch [1/5],Step [200/600],loss:0.1029
Epoch [1/5],Step [300/600],loss:0.0826
Epoch [1/5],Step [400/600],loss:0.0431
Epoch [1/5],Step [500/600],loss:0.0457
Epoch [1/5],Step [600/600],loss:0.0220
Epoch [2/5],Step [100/600],loss:0.0922
Epoch [2/5],Step [200/600],loss:0.0232
Epoch [2/5],Step [300/600],loss:0.0847
Epoch [2/5],Step [400/600],loss:0.1794
Epoch [2/5],Step [500/600],loss:0.0404
Epoch [2/5],Step [600/600],loss:0.0451
Epoch [3/5],Step [100/600],loss:0.0172
Epoch [3/5],Step [200/600],loss:0.0032
Epoch [3/5],Step [300/600],loss:0.0739
Epoch [3/5],Step [400/600],loss:0.0137
Epoch [3/5],Step [500/600],loss:0.0665
Epoch [3/5],Step [600/600],loss:0.0385
Epoch [4/5],Step [100/600],loss:0.0649
Epoch [4/5],Step [200/600],loss:0.0104
Epoch [4/5],Step [300/600],loss:0.1143
Epoch [4/5],Step [400/600],loss:0.0508
Epoch [4/5],Step [500/600],loss:0.0565
Epoch [4/5],Step [600/600],loss:0.0127
Epoch [5/5],Step [100/600],loss:0.0050
Epoch [5