## Task 1 : Design and implement a Convolutional Neural Network (CNN) model from scratch

1. Imports and Device Specifications

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


2. Transforming Image

In [None]:
img_size = 224

transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

3. Loading Dataset

In [None]:
data_dir = "/kaggle/input/caltech256/256_ObjectCategories"
full_dataset = datasets.ImageFolder(data_dir, transform=transform)

4. Test - Train split

In [4]:
total_size = len(full_dataset)
train_size = int(0.7 * total_size)
val_size   = int(0.15 * total_size)
test_size  = total_size - train_size - val_size

print(f"Total images = {total_size}")
print(f"Train: {train_size} | Val: {val_size} | Test: {test_size}")

# Split
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size])

Total images = 30607
Train: 21424 | Val: 4591 | Test: 4592


5. Dataloading

In [5]:
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

6. VGG like CNN architecture

In [6]:
class MyVGG(nn.Module):
    def __init__(self, num_classes=257):
        super(MyVGG, self).__init__()

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 5
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.AdaptiveAvgPool2d((7, 7))
        )

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

7. Initializing model

In [7]:
model = MyVGG(num_classes=257).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)

num_epochs = 25
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=0.05,
    steps_per_epoch=len(train_loader),
    epochs=num_epochs,
    pct_start=0.3,
    anneal_strategy='cos',
    div_factor=10,
    final_div_factor=100
)

8. Training loop

In [8]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    train_loss = running_loss / total
    train_acc = 100.0 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()

    val_loss = val_loss / val_total
    val_acc = 100.0 * val_correct / val_total

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_loss:.4f}  Train Acc: {train_acc:.2f}%  "
          f"Val Loss: {val_loss:.4f}  Val Acc: {val_acc:.2f}%")

Epoch [1/25] Train Loss: 5.3324  Train Acc: 5.17%  Val Loss: 5.1915  Val Acc: 6.62%
Epoch [2/25] Train Loss: 5.1186  Train Acc: 7.19%  Val Loss: 4.9888  Val Acc: 7.95%
Epoch [3/25] Train Loss: 4.9095  Train Acc: 9.30%  Val Loss: 4.7202  Val Acc: 10.74%
Epoch [4/25] Train Loss: 4.7271  Train Acc: 10.87%  Val Loss: 4.5477  Val Acc: 13.11%
Epoch [5/25] Train Loss: 4.4747  Train Acc: 13.32%  Val Loss: 4.2782  Val Acc: 15.94%
Epoch [6/25] Train Loss: 4.2058  Train Acc: 16.02%  Val Loss: 4.0234  Val Acc: 20.08%
Epoch [7/25] Train Loss: 3.9459  Train Acc: 18.88%  Val Loss: 3.8128  Val Acc: 22.13%
Epoch [8/25] Train Loss: 3.6855  Train Acc: 22.70%  Val Loss: 3.6563  Val Acc: 25.11%
Epoch [9/25] Train Loss: 3.4530  Train Acc: 26.04%  Val Loss: 3.5358  Val Acc: 26.97%
Epoch [10/25] Train Loss: 3.1979  Train Acc: 30.00%  Val Loss: 3.3698  Val Acc: 30.08%
Epoch [11/25] Train Loss: 2.9716  Train Acc: 33.82%  Val Loss: 3.2003  Val Acc: 31.76%
Epoch [12/25] Train Loss: 2.7190  Train Acc: 37.82%  Val 

9. Save the model for later use

In [9]:
torch.save(model.state_dict(), "myvgg_model.pth")