**NN**

In [1]:
# All THE LIBRARIES:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [2]:
device

device(type='cuda', index=0)

In [3]:
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Normalize, Compose
from torch.utils.data import DataLoader, random_split
import torch

# Define transformations
transform = Compose([
    ToTensor(),  # Convert PIL Image to tensor
    Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

# Load CIFAR-10 dataset with transformations
dataset = CIFAR10(root='data/', download=True, transform=transform)
test_dataset = CIFAR10(root='data/', train=False, transform=transform)
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

# Create data loaders
batch_size = 128
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 42626845.08it/s]


Extracting data/cifar-10-python.tar.gz to data/


In [4]:
import torch.nn.functional as F

def evaluate(model, val_loader):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)  # Move batch to device
            outputs = model(images)
            loss = F.cross_entropy(outputs, labels)
            total_loss += loss.item()
            # basically one out of 10 with the highest probability would correspond to output of that
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    average_loss = total_loss / len(val_loader.dataset)  # Calculate average loss over the entire validation dataset
    return average_loss, accuracy


In [5]:
def train_model(model, train_loader, val_loader, optimizer, epochs=10):
    criterion = nn.CrossEntropyLoss()
    best_val_loss = float('inf')
    early_stopping_patience=3
    epochs_without_improvement = 0
    for epoch in range(epochs):
        model.train()
        for batch in train_loader:
            images, labels = batch[0].to(device), batch[1].to(device)
            # make all the gradient zero and find the output based
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        val_loss, val_acc = evaluate(model, val_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= early_stopping_patience:
                print(f"No improvement in validation loss for {early_stopping_patience} epochs. Stopping training.")
                break

In [6]:
import torch.nn.functional as F

class CIFAR10MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3, output_size):
        super(CIFAR10MLP, self).__init__()
        # Define fully connected layers
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, output_size)
        # Add batch normalization layers
        self.bn1 = nn.BatchNorm1d(hidden_size1)
        self.bn2 = nn.BatchNorm1d(hidden_size2)
        self.bn3 = nn.BatchNorm1d(hidden_size3)
        # Add dropout layers
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x


In [7]:
import time


In [8]:
# Model parameters
input_size = 3 * 32 * 32  # 3 channels, 32x32 image size
hidden_size1 = 512
hidden_size2 = 256
hidden_size3 =128
output_size = 10

# # Initialize and train the model
model = CIFAR10MLP(input_size, hidden_size1, hidden_size2,hidden_size3, output_size)
model = model.to(device)
lr=0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
start_time = time.time()  # Record the start time
train_model(model, train_loader, val_loader,optimizer,epochs=30)
end_time = time.time()  # Record the end time
training_time = end_time - start_time
print(f"Training completed in {training_time} seconds.")


Epoch [1/30], Val Loss: 0.0129, Val Acc: 0.4194
Epoch [2/30], Val Loss: 0.0122, Val Acc: 0.4470
Epoch [3/30], Val Loss: 0.0117, Val Acc: 0.4748
Epoch [4/30], Val Loss: 0.0113, Val Acc: 0.4922
Epoch [5/30], Val Loss: 0.0112, Val Acc: 0.4950
Epoch [6/30], Val Loss: 0.0109, Val Acc: 0.5046
Epoch [7/30], Val Loss: 0.0109, Val Acc: 0.5080
Epoch [8/30], Val Loss: 0.0107, Val Acc: 0.5228
Epoch [9/30], Val Loss: 0.0106, Val Acc: 0.5192
Epoch [10/30], Val Loss: 0.0104, Val Acc: 0.5292
Epoch [11/30], Val Loss: 0.0104, Val Acc: 0.5350
Epoch [12/30], Val Loss: 0.0103, Val Acc: 0.5312
Epoch [13/30], Val Loss: 0.0102, Val Acc: 0.5384
Epoch [14/30], Val Loss: 0.0102, Val Acc: 0.5412
Epoch [15/30], Val Loss: 0.0102, Val Acc: 0.5440
Epoch [16/30], Val Loss: 0.0101, Val Acc: 0.5454
Epoch [17/30], Val Loss: 0.0100, Val Acc: 0.5524
Epoch [18/30], Val Loss: 0.0101, Val Acc: 0.5480
Epoch [19/30], Val Loss: 0.0100, Val Acc: 0.5568
Epoch [20/30], Val Loss: 0.0100, Val Acc: 0.5516
Epoch [21/30], Val Loss: 0.00

In [10]:
loss,test_accuracy = evaluate(model, test_loader)
print(f"Test Accuracy for MLP is : {test_accuracy:.4f}")

Test Accuracy for MLP is : 0.5639


**CNN**

In [11]:
import torch.nn as nn

class CIFAR10CNN(nn.Module):
    def __init__(self):
        super(CIFAR10CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        # max pooling layers
        self.pool = nn.MaxPool2d(2, 2)
        # fully connected layers
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        # dropout layer
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # convolutional layers with batch normalization and max pooling
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        # flatten image input
        x = x.view(-1, 128 * 4 * 4)
        # fully connected layers with dropout
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x


In [12]:
# # Initialize and train the model
model1 = CIFAR10CNN()
model1 = model1.to(device)
lr=0.001
optimizer = torch.optim.Adam(model1.parameters(), lr=lr)
start_time = time.time()  # Record the start time
train_model(model1, train_loader, val_loader,optimizer,epochs=30)
end_time = time.time()  # Record the end time
training_time = end_time - start_time
print(f"Training completed in {training_time} seconds.")

Epoch [1/30], Val Loss: 0.0097, Val Acc: 0.5748
Epoch [2/30], Val Loss: 0.0081, Val Acc: 0.6466
Epoch [3/30], Val Loss: 0.0072, Val Acc: 0.6878
Epoch [4/30], Val Loss: 0.0063, Val Acc: 0.7306
Epoch [5/30], Val Loss: 0.0062, Val Acc: 0.7256
Epoch [6/30], Val Loss: 0.0059, Val Acc: 0.7470
Epoch [7/30], Val Loss: 0.0057, Val Acc: 0.7600
Epoch [8/30], Val Loss: 0.0051, Val Acc: 0.7816
Epoch [9/30], Val Loss: 0.0050, Val Acc: 0.7854
Epoch [10/30], Val Loss: 0.0064, Val Acc: 0.7380
Epoch [11/30], Val Loss: 0.0051, Val Acc: 0.7922
Epoch [12/30], Val Loss: 0.0052, Val Acc: 0.7864
No improvement in validation loss for 3 epochs. Stopping training.
Training completed in 161.37891507148743 seconds.


In [13]:
loss1,test_accuracy1 = evaluate(model1, test_loader)
print(f"Test Accuracy for CNN Model is : {test_accuracy1:.4f}")


Test Accuracy for CNN Model is : 0.7781


**VGG16**

In [17]:
from torchvision import models
import torch.optim as optim

# Load the pre-trained VGG-16 model
model2 = models.vgg16(pretrained=True)
model2= model2.to(device)


# # Freeze all the layers in the pre-trained model
# for param in model2.parameters():
#     param.requires_grad = False

# # Unfreeze the last few layers
# for param in model2.features[-4:].parameters():
#     param.requires_grad = True

# Modify the last layer to match the number of classes in the CIFAR-10 dataset
num_features = model2.classifier[6].in_features
features = list(model2.classifier.children())[:-1] # Remove last layer
features.extend([nn.Linear(num_features, 10)]) # Add our layer with 10 outputs
model2.classifier = nn.Sequential(*features) # Replace the model classifier

# Define the loss function and the optimizer
# optimizer = optim.SGD(filter(lambda p: p.requires_grad, model2.parameters()), lr=0.001, momentum=0.9)
optimizer = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9)
model2 = model2.to(device)
start_time = time.time()  # Record the start time
train_model(model2, train_loader, val_loader,optimizer,epochs=30)
end_time = time.time()
training_time = end_time - start_time
print(f"Training completed in {training_time} seconds.")
loss2,test_accuracy2 = evaluate(model2, test_loader)
print(f"Test Accuracy using VGG: {test_accuracy2:.4f}")

Epoch [1/30], Val Loss: 0.0045, Val Acc: 0.8116
Epoch [2/30], Val Loss: 0.0038, Val Acc: 0.8392
Epoch [3/30], Val Loss: 0.0035, Val Acc: 0.8502
Epoch [4/30], Val Loss: 0.0034, Val Acc: 0.8564
Epoch [5/30], Val Loss: 0.0034, Val Acc: 0.8614
Epoch [6/30], Val Loss: 0.0034, Val Acc: 0.8646
Epoch [7/30], Val Loss: 0.0036, Val Acc: 0.8654
Epoch [8/30], Val Loss: 0.0038, Val Acc: 0.8714
No improvement in validation loss for 3 epochs. Stopping training.
Training completed in 259.76842737197876 seconds.
Test Accuracy using VGG: 0.8614


**ACCURACY AND LOSS**

1.   MLP : Accuracy : 58%, Loss : 0.0097
2.   CNN : Accuracy : 78%, Loss : 0.0052
3.   VGG : Accuracy : 86%, Loss : 0.0038



**Reasons Behind Differences in Performance:**


*   CNNs leverage the spatial structure of images through convolutional layers, which can capture local patterns and spatial hierarchies. This allows CNNs to learn hierarchical representations of features, from simple to complex, which are crucial for image classification tasks.
*  MLPs, on the other hand, treat images as flattened vectors, disregarding their spatial structure. This results in a loss of spatial information and makes it harder for MLPs to learn meaningful representations of images.

*  The VGG-based model, being a deep CNN architecture pre-trained on ImageNet, has already learned a rich set of features from a diverse range of images. Fine-tuning this model on CIFAR-10 allows it to adapt these learned features to the specific characteristics of the CIFAR-10 dataset, resulting in better performance compared to training from scratch.






**Benefits of Transfer Learning:**


*   Transfer learning allows us to leverage knowledge gained from training on a large dataset (like ImageNet) and apply it to a different but related task (CIFAR-10).
*  When fine-tuning a pre-trained model, unfreezing only the last few layers typically requires less time compared to adding and training a new last layer. Freezing most of the model's layers leverages the pre-existing knowledge, reducing the computational burden and training time significantly.
*   The VGG-based model, which utilizes transfer learning, achieved higher accuracy than the MLP and CNN models. This demonstrates the effectiveness of transfer learning in improving performance.


