<a href="https://colab.research.google.com/github/ping-Mel/ERV-V2-Assignments/blob/main/session8/s8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

class Models:
    """
    In this class, we organize our neural network architectures as nested/inner classes.
    This approach groups related functionalities and creates an organized and encapsulated
    code structure. Each neural network architecture is defined as an inner class within
    this Models class. This allows for easy instantiation and clear hierarchy of neural
    network models, each with its distinct architecture and characteristics.
    """
    @staticmethod
    def evaluate_model(model_class, input_size=(3, 32, 32)):
        """
        Static method to evaluate the model architecture.
        This method will print a summary of the model showing the layers and parameters.

        Parameters:
        model_class (class): The inner class representing the neural network architecture to evaluate.
        input_size (tuple): The size of the input to the model. Default is (1, 28, 28) for MNIST dataset.
        """
        # Check for CUDA availability and set the device accordingly
        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda" if use_cuda else "cpu")

        # Initialize the model from the inner class and move to the appropriate device
        model = model_class().to(device)

        # Print the summary of the model
        summary(model, input_size=input_size)



    class NetA(nn.Module):
        """
        Inner class representing an initial neural network architecture.
        """
        def __init__(self):
            super(Models.NetA, self).__init__()
            # Convolutional layers
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 1    28      1   1    3      28     1    1  3
            self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 3    28      1   1    5      28     1    1  3
            self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 5    28      1   2    6      14     2    0  2
            self.pool1 = nn.MaxPool2d(2, 2)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 6    14      2   1    10     14     2    1  3
            self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 10    14     2   1    14     14     2    1  3
            self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 14    14     2   2    16     7     4     0  2
            self.pool2 = nn.MaxPool2d(2, 2)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 16    7      4   1    24     5     4     0  3
            self.conv5 = nn.Conv2d(256, 512, 3)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 24    5     4    1    32     3     4     0  3
            self.conv6 = nn.Conv2d(512, 1024, 3)
            #R_in, N_in, j_in, S, R_out, N_out, J_out, P, K
            # 32    3      4   1    40     1     4     0  3
            self.conv7 = nn.Conv2d(1024, 10, 3)

        def forward(self, x):
            x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
            x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
            x = F.relu(self.conv6(F.relu(self.conv5(x))))
            x = self.conv7(x)
            x = x.view(-1, 10) #1x1x10> 10
            return F.log_softmax(x, dim=-1)

# Target 1: Group Normalisation Network

## Initial Setup
- Construct the **basic skeleton** of the neural network model. This includes defining the layers and forward pass logic in Class Models (model.py) NetA inner class.
-

## Aim
1.   **70%** (this must be consistently shown in your last few epochs, and not a one-time achievement)
2.   Less than or equal to **20** Epochs
3.   Less than **50k** Parameters

In [46]:
!pip install torchsummary

from __future__ import print_function
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt



In [47]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.49139968, 0.48215827, 0.44653124)) # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
                                       # Note the difference between (0.1307) and (0.1307,)
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.49139968, 0.48215827, 0.44653124))
                                       ])

In [48]:
train = datasets.CIFAR10('./data', train=True, download=True, transform=train_transforms)
test = datasets.CIFAR10('./data', train=False, download=True, transform=test_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [49]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=256, num_workers=2, pin_memory=True) if cuda else dict(shuffle=True, batch_size=128)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? False


In [50]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}%')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))


In [51]:
Models.evaluate_model(Models.NetA)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
            Conv2d-2           [-1, 64, 32, 32]          18,496
         MaxPool2d-3           [-1, 64, 16, 16]               0
            Conv2d-4          [-1, 128, 16, 16]          73,856
            Conv2d-5          [-1, 256, 16, 16]         295,168
         MaxPool2d-6            [-1, 256, 8, 8]               0
            Conv2d-7            [-1, 512, 6, 6]       1,180,160
            Conv2d-8           [-1, 1024, 4, 4]       4,719,616
            Conv2d-9             [-1, 10, 2, 2]          92,170
Total params: 6,380,362
Trainable params: 6,380,362
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 2.02
Params size (MB): 24.34
Estimated Total Size (MB): 26.37
-------------------------------------

In [52]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Models.NetA().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

EPOCH: 0


  0%|          | 0/391 [00:01<?, ?it/s]


ValueError: Expected input batch_size (512) to match target batch_size (128).

In [None]:
t = [t_items.item() for t_items in train_losses]

fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot(t)
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(train_acc)
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(test_acc)
axs[1, 1].set_title("Test Accuracy")