In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Importing CIFAR-10 DataSet

We import CIFAR-10 data as tensors using torchvision dataset, normalize it, and load it into a DataLoader with a batch size of 64.

In [None]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader= torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 42798541.31it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## CNN Model

We define a convolutional neural network (CNN) architecture. The architecture consists of two convolutional layers followed by three fully connected layers. The model utilizes max pooling and ReLU activation functions to extract features from input images and outputs predictions for 10 classes.

In [15]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        # Define convolutional layers
        self.conv1 = nn.Conv2d(3, 6, 5)  # input channels=3, output channels=6, kernel size=5x5
        self.conv2 = nn.Conv2d(6, 16, 5)  # input channels=6, output channels=16, kernel size=5x5

        # Define fully connected layers
        self.fc1 = nn.Linear(400, 120)  # input size=400, output size=120
        self.fc2 = nn.Linear(120, 84)  # input size=120, output size=84
        self.fc3 = nn.Linear(84, 10)  # input size=84, output size=10-> number of classes

    def forward(self, x):

        # First convolutional layer with max pooling of size 2x2 and ReLU activation
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)

        # Second convolutional layer with max pooling of size 2x2 and ReLU activation
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1)  # Flatten feature maps

        # Fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))

        # Final fully connected layer
        x = self.fc3(x)
        return x

net = Net()
print(net)


Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:
# Using Cross entropy loss as loss function
criterion = nn.CrossEntropyLoss()
# Using Adam with learning rate 0.01 for gradient descent
optimizer = optim.Adam(net.parameters(), lr=0.01)

In [None]:
# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net=net.to(device) # Move the model to the available device

In [None]:
# Training the model
for epoch in range(30):
    train_loss = 0.0

    # Iterate over batches of data in the training loader
    for data, target in trainloader:
        data, target = data.to(device), target.to(device)  # Move data to the avaiable device (cuda or cpu)
        optimizer.zero_grad()   # Zero the gradient buffers
        output = net(data)  # Forward pass
        loss = criterion(output, target)  # Calculate the loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        train_loss += loss.item() * data.size(0)  # Accumulate the training loss

    train_loss = train_loss / len(trainloader.dataset)  # Calculate average training loss per sample

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))


Epoch: 0 	Training Loss: 1.766820
Epoch: 1 	Training Loss: 1.617495
Epoch: 2 	Training Loss: 1.575118
Epoch: 3 	Training Loss: 1.546802
Epoch: 4 	Training Loss: 1.527485
Epoch: 5 	Training Loss: 1.512298
Epoch: 6 	Training Loss: 1.488030
Epoch: 7 	Training Loss: 1.464262
Epoch: 8 	Training Loss: 1.459357
Epoch: 9 	Training Loss: 1.442765
Epoch: 10 	Training Loss: 1.424231
Epoch: 11 	Training Loss: 1.409670
Epoch: 12 	Training Loss: 1.415659
Epoch: 13 	Training Loss: 1.406955
Epoch: 14 	Training Loss: 1.402304
Epoch: 15 	Training Loss: 1.384663
Epoch: 16 	Training Loss: 1.378980
Epoch: 17 	Training Loss: 1.361006
Epoch: 18 	Training Loss: 1.355067
Epoch: 19 	Training Loss: 1.355414
Epoch: 20 	Training Loss: 1.335979
Epoch: 21 	Training Loss: 1.345602
Epoch: 22 	Training Loss: 1.334667
Epoch: 23 	Training Loss: 1.322590
Epoch: 24 	Training Loss: 1.306889
Epoch: 25 	Training Loss: 1.324198
Epoch: 26 	Training Loss: 1.294480
Epoch: 27 	Training Loss: 1.323995
Epoch: 28 	Training Loss: 1.30

In [None]:
# Evaluating the model on training set
correct = 0
total = 0
# Set the model to evaluation mode
net.eval()

# Disable gradient calculation for evaluation
with torch.no_grad():

    for data, target in testloader:
        data, target = data.to(device), target.to(device)  # Move data to the selected device
        # Perform forward pass
        outputs = net(data)
        # Get the predicted classes
        _, pred = torch.max(outputs.data, 1)
        # Update total number of samples
        total += target.size(0)
        # Update number of correct predictions
        correct += (pred == target).sum()

# Calculate the accuracy
accuracy = float(100 * correct / total)

print(f'Test accuracy is: {accuracy:.2f}%')


Test accuracy is: 51.51%



With the basic model architecture defined, we achieved an accuracy of 51.5% on the dataset. In the next steps, we will explore enhancements to the model architecture to improve its performance.

## Improved CNN Model

We enhance the architecture by defining a convolutional neural network, featuring batch normalization after each convolutional layer. The architecture comprises three convolutional layers with subsequent max pooling operations. Concluding with three fully connected layers with ReLU activations.

In [None]:
class Net2(nn.Module):

    def __init__(self):
        super(Net2, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.conv1_bn = nn.BatchNorm2d(32)  # Batch normalization after conv1

        # Second convolutional layer
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv2_bn = nn.BatchNorm2d(64)  # Batch normalization after conv2

        # Third convolutional layer
        self.conv3 = nn.Conv2d(64, 128, 3)

        # Fully connected layers
        self.fc1 = nn.Linear(512, 256)  # Fully connected layer 1
        self.fc2 = nn.Linear(256, 128)  # Fully connected layer 2
        self.fc3 = nn.Linear(128, 10)   # Output layer for 10 classes

    def forward(self, x):

        # First conv layer with max-pooling of 2x2 and stride 2, with batch normalization and ReLU activation
        x = F.max_pool2d(F.relu(self.conv1_bn(self.conv1(x))), 2)
        # Second conv layer with max-pooling of 2x2 and stride 2, with batch normalization and ReLU activation
        x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), 2)
        # Forward pass through conv3 with ReLU activation
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)
        # Flatten the output tensor
        x = torch.flatten(x, 1)
        # Forward pass through fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Final output layer
        x = self.fc3(x)
        return x

net2 = Net2()
print(net2)


Net2(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
)


In [None]:
# Using Cross entropy loss as loss function
criterion = nn.CrossEntropyLoss()
# Using Adam with learning rate 0.01 for gradient descent
optimizer = optim.Adam(net2.parameters(), lr=0.01)

In [None]:
net2=net2.to(device) # Move the model to the available device

In [None]:
# Training the model for 30 epochs, batch size of 64
for epoch in range(30):
    train_loss = 0.0
    for data, target in trainloader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = net2(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)

    train_loss = train_loss/len(trainloader.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

Epoch: 0 	Training Loss: 1.769236
Epoch: 1 	Training Loss: 1.501370
Epoch: 2 	Training Loss: 1.421908
Epoch: 3 	Training Loss: 1.377169
Epoch: 4 	Training Loss: 1.344728
Epoch: 5 	Training Loss: 1.314730
Epoch: 6 	Training Loss: 1.285381
Epoch: 7 	Training Loss: 1.257727
Epoch: 8 	Training Loss: 1.243471
Epoch: 9 	Training Loss: 1.225556
Epoch: 10 	Training Loss: 1.212813
Epoch: 11 	Training Loss: 1.178521
Epoch: 12 	Training Loss: 1.147031
Epoch: 13 	Training Loss: 1.130814
Epoch: 14 	Training Loss: 1.108860
Epoch: 15 	Training Loss: 1.085133
Epoch: 16 	Training Loss: 1.075097
Epoch: 17 	Training Loss: 1.058152
Epoch: 18 	Training Loss: 1.048248
Epoch: 19 	Training Loss: 1.032190
Epoch: 20 	Training Loss: 1.018758
Epoch: 21 	Training Loss: 1.004898
Epoch: 22 	Training Loss: 0.995754
Epoch: 23 	Training Loss: 0.986801
Epoch: 24 	Training Loss: 0.974117
Epoch: 25 	Training Loss: 0.968210
Epoch: 26 	Training Loss: 0.961463
Epoch: 27 	Training Loss: 0.956993
Epoch: 28 	Training Loss: 0.95

In [None]:
# Evaluating the model on training set
correct = 0
total = 0
net2.eval()

with torch.no_grad():

    for data, target in testloader:
        data, target = data.to(device), target.to(device)
        outputs = net2(data)
        _, pred = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (pred == target).sum()

# Calculate the accuracy
accuracy = float(100 * correct / total)
print(f'Test accuracy is: {accuracy:.2f}%')

Test accuracy is: 62.52%


Here we see that by increasing the depth of the Conv Net and adding batch Norm, we improved the model's performance, achieving an acuuracy of 62.5% on the test set. Further improvements, like adjusting the learning rate in Adam optimization, are possible, but we're holding off due to limited computing resources.

## Transfer Learning using ResNet-18 Model

We will now implement transfer learning using the PyTorch ResNet-18 model on the CIFAR-10 dataset. We will freeze all weights except for the last FC layer, which will be trained specifically for the 10 classes in CIFAR-10. Prior to using ResNet, we need to resize our CIFAR-10 dataset to the size of 224x224 and normalize it using the mean values `[0.485, 0.456, 0.406]` and standard deviation values `[0.229, 0.224, 0.225]`, as specified in the [ResNet](https://pytorch.org/hub/pytorch_vision_resnet/) documentation.

In [2]:
transform2 = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

trainset2 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform2)
trainloader2 = torch.utils.data.DataLoader(trainset2, batch_size=64,shuffle=True)
testset2 = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform2)
testloader2 = torch.utils.data.DataLoader(testset2, batch_size=64,shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12984547.17it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [8]:
import torchvision.models as models
model_resnet = models.resnet18(weights='IMAGENET1K_V1')
# Print ResNet-18 model
print(model_resnet)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

Now, we freeze all ResNet parameters except the last fully connected layer and replace it with a fully connected layers for CIFAR-10 classification

In [9]:
# Freeze all parameters in the ResNet model
for param in model_resnet.parameters():
    param.requires_grad = False
# Get the number of input features for the last FC layer
features = model_resnet.fc.in_features

# Replace the last FC layer
model_resnet.fc = nn.Sequential(nn.Linear(features, 128))  # Fully connected layer with 10 output features
# New layers to a pre-trained model are by default set to require gradients

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_resnet = model_resnet.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_resnet.parameters(), lr=0.01)

We will now train our ResNet model with the added fully connected layers for CIFAR-10 classification, while keeping the pre-existing layers frozen to leverage the pre-trained features. The model will take ~20 minutes if trained on GPU

In [12]:
for epoch in range(10):
    train_loss = 0.0
    for data, target in trainloader2:
        data, target= data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model_resnet(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*data.size(0)
    train_loss = train_loss/len(trainloader2.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

Epoch: 0 	Training Loss: 0.814544
Epoch: 1 	Training Loss: 0.812994
Epoch: 2 	Training Loss: 0.842185
Epoch: 3 	Training Loss: 0.814556
Epoch: 4 	Training Loss: 0.814977
Epoch: 5 	Training Loss: 0.820009
Epoch: 6 	Training Loss: 0.821935
Epoch: 7 	Training Loss: 0.818554
Epoch: 8 	Training Loss: 0.822071
Epoch: 9 	Training Loss: 0.846803


We now evaluate the ResNet model, which includes the added fully connected layers for CIFAR-10 classification, on the test set



In [13]:
correct = 0
total = 0
model_resnet.eval()

with torch.no_grad():
    for data, labels in testloader2:
        data, labels= data.to(device), labels.to(device)
        outputs = model_resnet(data)
        _, pred = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (pred == labels).sum()

accuracy = float(100 * correct / total)
print(f'Test accuracy is: {accuracy:.2f}%')

Test accuracy is: 77.96%


After testing, our ResNet model performed better on the test set, achieving a 77.9% accuracy. We achieved improvements compared to our previous CNN model by leveraging pre-trained ResNet features and fine-tuning the model's parameters for last layer