In [2]:
%pip install torchsummary
%pip install albumentations

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import gaussian_filter
from PIL import Image


In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1, stride=2),
            nn.ReLU(),
            nn.BatchNorm2d(32),

            nn.Dropout(0.25)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1,stride=2),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(0.25)
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1,stride=2),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Dropout(0.25)

         )

        self.conv4_depthwise = nn.Sequential(
            nn.Conv2d(128, 128, 3, padding=1, stride=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Dropout(0.1)
         )
        self.gap = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(128  , 10)
       

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4_depthwise(x)
        x = self.gap(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        return F.log_softmax(x,dim=1)

In [3]:

from torchsummary import summary
use_cuda =  torch.cuda.is_available()
print("CUDA Available?", use_cuda)
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

CUDA Available? True
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 16, 16]             896
              ReLU-2           [-1, 32, 16, 16]               0
       BatchNorm2d-3           [-1, 32, 16, 16]              64
           Dropout-4           [-1, 32, 16, 16]               0
            Conv2d-5             [-1, 64, 8, 8]          18,496
              ReLU-6             [-1, 64, 8, 8]               0
       BatchNorm2d-7             [-1, 64, 8, 8]             128
           Dropout-8             [-1, 64, 8, 8]               0
            Conv2d-9            [-1, 128, 4, 4]          73,856
             ReLU-10            [-1, 128, 4, 4]               0
      BatchNorm2d-11            [-1, 128, 4, 4]             256
          Dropout-12            [-1, 128, 4, 4]               0
           Conv2d-13            [-1, 128, 4, 4]         147,584
             ReLU-

In [4]:
model

Net(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.25, inplace=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.25, inplace=False)
  )
  (conv3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.25, inplace=False)
  )
  (conv4_depthwise): Sequential(
    (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.1, inplace=False

In [5]:
import random
import numpy as np

def coarse_dropout(image, min_holes=1, max_holes=1, max_height=16, max_width=16, min_height=16, min_width=16, fill_value=0):
  """
  Applies CoarseDropout to an image.

  Args:
    image: PIL Image to apply the dropout to.
    min_holes: Minimum number of holes to apply.
    max_holes: Maximum number of holes to apply.
    max_height: Maximum height of the holes.
    max_width: Maximum width of the holes.
    fill_value: Value to fill the holes with.

  Returns:
    PIL Image with CoarseDropout applied.
  """
  image_np = np.array(image)
  num_holes = random.randint(min_holes, max_holes)

  h, w, _ = image_np.shape  # Assuming the image is in HWC format

  # Calculate the mean value of the image
  fill_value = int(np.mean(image_np))

  for _ in range(num_holes):
    hole_h = random.randint(min_height, max_height)
    hole_w = random.randint(min_width, max_width)
    y = random.randint(0, h - hole_h)
    x = random.randint(0, w - hole_w)

    image_np[y:y + hole_h, x:x + hole_w, :] = fill_value

  return image #return as PIL image

In [16]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
torch.manual_seed(1)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=True, download=True,
                    transform=transforms.Compose([
                    transforms.Lambda(lambda x: coarse_dropout(x)),  # Add CoarseDropout
                    transforms.RandomHorizontalFlip(),
                    transforms.Lambda(lambda x: np.array(x)),
                    transforms.Lambda(lambda x: A.Compose([ # Albumentations Compose for augmentations
                            A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, p=0.2),
                            # Other Albumentations transforms if needed
                        ])(image=x)['image']), # Convert to PyTorch Tensor
                    transforms.Lambda(lambda x: Image.fromarray(x)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

Files already downloaded and verified


In [8]:
# Adding a early stopping class

import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [9]:
from tqdm import tqdm

train_losses = []
test_losses = []
train_acc = []
test_acc = []


def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    # get samples
    data, target = data.to(device), target.to(device)

    # Init
    optimizer.zero_grad()
    # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes.
    # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

    # Predict
    y_pred = model(data)

    # Calculate loss
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)

    # Backpropagation
    loss.backward()
    optimizer.step()

    # Update pbar-tqdm

    pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed += len(data)

    pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

  train_loss_sum_epoch = processed - correct
  return train_loss_sum_epoch

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    test_acc.append(100. * correct / len(test_loader.dataset))

In [17]:
from torch.optim.lr_scheduler import StepLR


model = Net().to(device)
early_stopping = EarlyStopping(patience=3, verbose=True) # patience is number of epochs to wait for improvement

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 30
for epoch in range(1, num_epochs+1):
    print("EPOCH:", epoch)
    val_loss = train(model, device, train_loader, optimizer, epoch)
    # This is to genalize the model faster
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break
    # After each trained epoch , step up
    scheduler.step()
    test(model, device, test_loader)

EPOCH: 1


Loss=1.9167639017105103 Batch_id=781 Accuracy=42.98: 100%|██████████| 782/782 [01:08<00:00, 11.40it/s]


Validation loss decreased (inf --> 28508.000000).  Saving model ...

Test set: Average loss: 1.3106, Accuracy: 5295/10000 (52.95%)

EPOCH: 2


Loss=1.2440701723098755 Batch_id=781 Accuracy=54.11: 100%|██████████| 782/782 [01:09<00:00, 11.31it/s]


Validation loss decreased (28508.000000 --> 22944.000000).  Saving model ...

Test set: Average loss: 1.1431, Accuracy: 5931/10000 (59.31%)

EPOCH: 3


Loss=1.038072109222412 Batch_id=781 Accuracy=58.87: 100%|██████████| 782/782 [01:08<00:00, 11.47it/s] 


Validation loss decreased (22944.000000 --> 20567.000000).  Saving model ...

Test set: Average loss: 1.0421, Accuracy: 6280/10000 (62.80%)

EPOCH: 4


Loss=1.8617281913757324 Batch_id=781 Accuracy=64.60: 100%|██████████| 782/782 [01:08<00:00, 11.36it/s]


Validation loss decreased (20567.000000 --> 17702.000000).  Saving model ...

Test set: Average loss: 0.8894, Accuracy: 6842/10000 (68.42%)

EPOCH: 5


Loss=0.7639380097389221 Batch_id=781 Accuracy=66.24: 100%|██████████| 782/782 [01:09<00:00, 11.31it/s]


Validation loss decreased (17702.000000 --> 16879.000000).  Saving model ...

Test set: Average loss: 0.8578, Accuracy: 6974/10000 (69.74%)

EPOCH: 6


Loss=0.9297522902488708 Batch_id=781 Accuracy=66.74: 100%|██████████| 782/782 [01:08<00:00, 11.41it/s]


Validation loss decreased (16879.000000 --> 16630.000000).  Saving model ...

Test set: Average loss: 0.8415, Accuracy: 7021/10000 (70.21%)

EPOCH: 7


Loss=0.8880215883255005 Batch_id=781 Accuracy=67.71: 100%|██████████| 782/782 [01:09<00:00, 11.33it/s]


Validation loss decreased (16630.000000 --> 16147.000000).  Saving model ...

Test set: Average loss: 0.8425, Accuracy: 7023/10000 (70.23%)

EPOCH: 8


Loss=0.9501712322235107 Batch_id=781 Accuracy=67.77: 100%|██████████| 782/782 [01:06<00:00, 11.82it/s]


Validation loss decreased (16147.000000 --> 16113.000000).  Saving model ...

Test set: Average loss: 0.8402, Accuracy: 7028/10000 (70.28%)

EPOCH: 9


Loss=0.9998897910118103 Batch_id=781 Accuracy=67.95: 100%|██████████| 782/782 [01:02<00:00, 12.53it/s]


Validation loss decreased (16113.000000 --> 16025.000000).  Saving model ...

Test set: Average loss: 0.8438, Accuracy: 7017/10000 (70.17%)

EPOCH: 10


Loss=1.826817274093628 Batch_id=781 Accuracy=68.06: 100%|██████████| 782/782 [01:02<00:00, 12.59it/s] 


Validation loss decreased (16025.000000 --> 15972.000000).  Saving model ...

Test set: Average loss: 0.8378, Accuracy: 7051/10000 (70.51%)

EPOCH: 11


Loss=1.0531303882598877 Batch_id=781 Accuracy=67.82: 100%|██████████| 782/782 [01:06<00:00, 11.84it/s]


EarlyStopping counter: 1 out of 3

Test set: Average loss: 0.8416, Accuracy: 7029/10000 (70.29%)

EPOCH: 12


Loss=1.2466462850570679 Batch_id=781 Accuracy=68.01: 100%|██████████| 782/782 [01:09<00:00, 11.32it/s]


EarlyStopping counter: 2 out of 3

Test set: Average loss: 0.8330, Accuracy: 7053/10000 (70.53%)

EPOCH: 13


Loss=0.4257303476333618 Batch_id=781 Accuracy=67.85: 100%|██████████| 782/782 [01:08<00:00, 11.37it/s]

EarlyStopping counter: 3 out of 3
Early stopping



