# Import Libraries

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

ModuleNotFoundError: No module named 'torch'

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise.


In [13]:
# Train Phase transformations
# train_transforms = transforms.Compose([
#                                        transforms.Resize(32),
#                                       #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
#                                        transforms.RandomRotation((-7.0, 7.0), fill=(1,)),
#                                        transforms.ToTensor(),
#                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))# The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values.
#                                        # Note the difference between (0.1307) and (0.1307,)
#                                        ])

# # Test Phase transformations
# test_transforms = transforms.Compose([
#                                        transforms.Resize(32),
#                                       #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
#                                        transforms.ToTensor(),
#                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
#                                        ])

train_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))# Normalize tensor values to [-1, 1]
])


test_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL image to PyTorch tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))# Normalize tensor values to [-1, 1]
])

# Dataset and Creating Train/Test Split

In [14]:
train = datasets.CIFAR10('./data', train=True, download=True, transform=train_transform)
test = datasets.CIFAR10('./data', train=False, download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [15]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=0, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# # train dataloader
# train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# # test dataloader
# test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

train_dataloader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

CUDA Available? True


In [16]:
for i in train_dataloader:
  print(i[0].shape)
  break

torch.Size([64, 3, 32, 32])


In [17]:
dataloader_args

{'shuffle': True, 'batch_size': 128, 'num_workers': 0, 'pin_memory': True}

# The model
Let's start with the model we first saw

In [24]:
import torch.nn.functional as F
dropout_value = 0.1
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3), padding=2, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 34

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 32

        # TRANSITION BLOCK 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 16

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 16
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        ) # output_size = 14
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
        )

        self.pool2  = nn.MaxPool2d(2, 2) # output_size = 7

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value)
        ) # output_size = 7

        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        )
        # output_size = 5

        self.convblock9 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value)
        )
        # output_size = 3

        # OUTPUT BLOCK
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=3)
        ) # output_size = 1

        self.convblock10 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            # nn.BatchNorm2d(10),
            # nn.ReLU(),
            # nn.Dropout(dropout_value)
        )


        self.dropout = nn.Dropout(dropout_value)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.pool2(x)
        x = self.convblock7(x)
        x = self.convblock8(x)
        x = self.convblock9(x)
        x = self.gap(x)
        x = self.convblock10(x)

        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

# Model Params
Can't emphasize on how important viewing Model Summary is.
Unfortunately, there is no in-built model visualizer, so we have to take external help

In [25]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 34, 34]             432
              ReLU-2           [-1, 16, 34, 34]               0
       BatchNorm2d-3           [-1, 16, 34, 34]              32
           Dropout-4           [-1, 16, 34, 34]               0
            Conv2d-5           [-1, 32, 32, 32]           4,608
              ReLU-6           [-1, 32, 32, 32]               0
       BatchNorm2d-7           [-1, 32, 32, 32]              64
           Dropout-8           [-1, 32, 32, 32]               0
            Conv2d-9           [-1, 10, 32, 32]             320
        MaxPool2d-10           [-1, 10, 16, 16]               0
           Conv2d-11           [-1, 16, 16, 16]           1,440
             ReLU-12           [-1, 16, 16, 16]               0

# Training and Testing

All right, so we have 24M params, and that's too many, we know that. But the purpose of this notebook is to set things right for our future experiments.

Looking at logs can be boring, so we'll introduce **tqdm** progressbar to get cooler logs.

Let's write train and test functions

In [26]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  print(train_loader)
  correct = 0
  processed = 0
  # for i in train_loader:
  #   print(i[0].shape)
  #   break
  for batch_idx, (data, target) in enumerate(pbar):
    # print("a")
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [27]:
from torch.optim.lr_scheduler import StepLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# scheduler = StepLR(optimizer, step_size=6, gamma=0.1)


EPOCHS = 20
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_dataloader, optimizer, epoch)
    # scheduler.step()
    test(model, device, test_dataloader)

EPOCH: 0


Loss=2.285463571548462 Batch_id=5 Accuracy=12.76:   0%|          | 3/782 [00:00<00:28, 27.16it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.9485868215560913 Batch_id=781 Accuracy=45.12: 100%|██████████| 782/782 [00:23<00:00, 33.28it/s]



Test set: Average loss: 1.2465, Accuracy: 5480/10000 (54.80%)

EPOCH: 1


Loss=1.4448610544204712 Batch_id=6 Accuracy=55.80:   1%|          | 4/782 [00:00<00:21, 35.98it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.8720466494560242 Batch_id=781 Accuracy=58.25: 100%|██████████| 782/782 [00:23<00:00, 33.05it/s]



Test set: Average loss: 1.0803, Accuracy: 6149/10000 (61.49%)

EPOCH: 2


Loss=0.8045569062232971 Batch_id=5 Accuracy=66.67:   1%|          | 4/782 [00:00<00:22, 35.34it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=1.0334985256195068 Batch_id=781 Accuracy=63.02: 100%|██████████| 782/782 [00:23<00:00, 33.40it/s]



Test set: Average loss: 0.9985, Accuracy: 6432/10000 (64.32%)

EPOCH: 3


Loss=1.1088333129882812 Batch_id=5 Accuracy=67.97:   1%|          | 4/782 [00:00<00:22, 34.08it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=1.0252745151519775 Batch_id=781 Accuracy=66.05: 100%|██████████| 782/782 [00:23<00:00, 33.73it/s]



Test set: Average loss: 0.9291, Accuracy: 6717/10000 (67.17%)

EPOCH: 4


Loss=0.7336885929107666 Batch_id=6 Accuracy=69.87:   1%|          | 4/782 [00:00<00:21, 35.50it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.9781189560890198 Batch_id=781 Accuracy=68.20: 100%|██████████| 782/782 [00:23<00:00, 33.46it/s]



Test set: Average loss: 0.8663, Accuracy: 6942/10000 (69.42%)

EPOCH: 5


Loss=0.7894728183746338 Batch_id=5 Accuracy=67.71:   1%|          | 4/782 [00:00<00:22, 35.24it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.5801871418952942 Batch_id=781 Accuracy=69.75: 100%|██████████| 782/782 [00:23<00:00, 33.35it/s]



Test set: Average loss: 0.8143, Accuracy: 7080/10000 (70.80%)

EPOCH: 6


Loss=1.0748766660690308 Batch_id=4 Accuracy=69.06:   0%|          | 3/782 [00:00<00:30, 25.74it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.7829001545906067 Batch_id=781 Accuracy=70.89: 100%|██████████| 782/782 [00:23<00:00, 33.72it/s]



Test set: Average loss: 0.8150, Accuracy: 7188/10000 (71.88%)

EPOCH: 7


Loss=0.7763178944587708 Batch_id=6 Accuracy=70.54:   1%|          | 4/782 [00:00<00:21, 35.68it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.6563157439231873 Batch_id=781 Accuracy=71.94: 100%|██████████| 782/782 [00:23<00:00, 33.80it/s]



Test set: Average loss: 0.7699, Accuracy: 7316/10000 (73.16%)

EPOCH: 8


Loss=0.98150634765625 Batch_id=5 Accuracy=69.53:   1%|          | 4/782 [00:00<00:23, 33.70it/s]  

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.4968893527984619 Batch_id=781 Accuracy=72.52: 100%|██████████| 782/782 [00:23<00:00, 33.26it/s]



Test set: Average loss: 0.7644, Accuracy: 7348/10000 (73.48%)

EPOCH: 9


Loss=0.6719270348548889 Batch_id=5 Accuracy=73.18:   1%|          | 4/782 [00:00<00:21, 35.83it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=1.3676542043685913 Batch_id=781 Accuracy=73.09: 100%|██████████| 782/782 [00:23<00:00, 33.48it/s]



Test set: Average loss: 0.7552, Accuracy: 7383/10000 (73.83%)

EPOCH: 10


Loss=0.9070302248001099 Batch_id=6 Accuracy=70.54:   1%|          | 4/782 [00:00<00:21, 36.98it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.8014262318611145 Batch_id=781 Accuracy=73.70: 100%|██████████| 782/782 [00:23<00:00, 33.62it/s]



Test set: Average loss: 0.7309, Accuracy: 7432/10000 (74.32%)

EPOCH: 11


Loss=1.046630859375 Batch_id=6 Accuracy=74.11:   1%|          | 4/782 [00:00<00:22, 34.82it/s]    

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.242056205868721 Batch_id=781 Accuracy=74.04: 100%|██████████| 782/782 [00:23<00:00, 33.52it/s]



Test set: Average loss: 0.7417, Accuracy: 7418/10000 (74.18%)

EPOCH: 12


Loss=0.5399878025054932 Batch_id=5 Accuracy=76.04:   1%|          | 4/782 [00:00<00:23, 33.42it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.8176525235176086 Batch_id=781 Accuracy=74.36: 100%|██████████| 782/782 [00:23<00:00, 33.30it/s]



Test set: Average loss: 0.7602, Accuracy: 7378/10000 (73.78%)

EPOCH: 13


Loss=0.8187240958213806 Batch_id=4 Accuracy=72.50:   0%|          | 3/782 [00:00<00:30, 25.89it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.5481528639793396 Batch_id=781 Accuracy=74.79: 100%|██████████| 782/782 [00:23<00:00, 33.46it/s]



Test set: Average loss: 0.6996, Accuracy: 7599/10000 (75.99%)

EPOCH: 14


Loss=0.7087733745574951 Batch_id=6 Accuracy=77.01:   1%|          | 4/782 [00:00<00:20, 37.53it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=1.0846683979034424 Batch_id=781 Accuracy=75.33: 100%|██████████| 782/782 [00:23<00:00, 33.74it/s]



Test set: Average loss: 0.7096, Accuracy: 7555/10000 (75.55%)

EPOCH: 15


Loss=0.632408618927002 Batch_id=5 Accuracy=75.78:   1%|          | 4/782 [00:00<00:23, 32.69it/s] 

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.5684993267059326 Batch_id=781 Accuracy=75.54: 100%|██████████| 782/782 [00:23<00:00, 33.29it/s]



Test set: Average loss: 0.6867, Accuracy: 7652/10000 (76.52%)

EPOCH: 16


Loss=0.7527834177017212 Batch_id=5 Accuracy=77.60:   1%|          | 4/782 [00:00<00:22, 33.90it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.6438153982162476 Batch_id=781 Accuracy=76.00: 100%|██████████| 782/782 [00:23<00:00, 33.46it/s]



Test set: Average loss: 0.6840, Accuracy: 7659/10000 (76.59%)

EPOCH: 17


Loss=0.6555640697479248 Batch_id=5 Accuracy=78.39:   1%|          | 4/782 [00:00<00:21, 35.95it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.7978464365005493 Batch_id=781 Accuracy=76.04: 100%|██████████| 782/782 [00:23<00:00, 33.42it/s]



Test set: Average loss: 0.6921, Accuracy: 7623/10000 (76.23%)

EPOCH: 18


Loss=0.5301916599273682 Batch_id=5 Accuracy=77.86:   1%|          | 4/782 [00:00<00:22, 34.93it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.7904349565505981 Batch_id=781 Accuracy=76.53: 100%|██████████| 782/782 [00:23<00:00, 33.01it/s]



Test set: Average loss: 0.6815, Accuracy: 7650/10000 (76.50%)

EPOCH: 19


Loss=0.8496437668800354 Batch_id=5 Accuracy=72.40:   1%|          | 4/782 [00:00<00:23, 33.69it/s]

<torch.utils.data.dataloader.DataLoader object at 0x7f67fec7e860>


Loss=0.863579511642456 Batch_id=781 Accuracy=76.85: 100%|██████████| 782/782 [00:23<00:00, 33.35it/s]



Test set: Average loss: 0.6867, Accuracy: 7643/10000 (76.43%)



In [None]:
for (_,_) in train_loader:
  print("a")