In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
# Data augmentation and normalisation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Augmentation: Randomly crop images
    transforms.RandomHorizontalFlip(),     # Augmentation: Horizontal flip
    transforms.ToTensor(),                 # Convert to tensor
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),  # CIFAR-100 mean and std
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

# Load CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [6]:
# CNN model
class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),  # Batch Normalisation
            nn.LeakyReLU(negative_slope=0.1, inplace=True),  # Leaky ReLU
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 4 * 4, 512),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),
            nn.Dropout(0.3),  # Reduced Dropout rate
            nn.Linear(512, 100),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.classifier(x)
        return x


In [5]:
# Instantiate the model
model = ImprovedCNN().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Changed to Adam optimizer


In [7]:
# Training function
def train(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100. * correct / total
    return train_loss, train_acc


In [8]:
# Testing function
def test(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    test_loss = running_loss / len(test_loader)
    test_acc = 100. * correct / total
    return test_loss, test_acc


In [9]:
# Training and testing loop
num_epochs = 20
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    test_loss, test_acc = test(model, test_loader, criterion)

    print(f'Epoch [{epoch+1}/{num_epochs}]:')
    print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%')
    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%')


Epoch [1/20]:
Train Loss: 3.8303, Train Accuracy: 11.39%
Test Loss: 3.2829, Test Accuracy: 19.86%
Epoch [2/20]:
Train Loss: 3.1037, Train Accuracy: 23.20%
Test Loss: 2.7386, Test Accuracy: 30.78%
Epoch [3/20]:
Train Loss: 2.6770, Train Accuracy: 31.30%
Test Loss: 2.4239, Test Accuracy: 36.43%
Epoch [4/20]:
Train Loss: 2.3954, Train Accuracy: 37.05%
Test Loss: 2.2304, Test Accuracy: 41.50%
Epoch [5/20]:
Train Loss: 2.1838, Train Accuracy: 41.88%
Test Loss: 2.0873, Test Accuracy: 44.24%
Epoch [6/20]:
Train Loss: 2.0151, Train Accuracy: 45.63%
Test Loss: 1.9405, Test Accuracy: 47.79%
Epoch [7/20]:
Train Loss: 1.8871, Train Accuracy: 48.53%
Test Loss: 1.8877, Test Accuracy: 49.63%
Epoch [8/20]:
Train Loss: 1.7636, Train Accuracy: 51.31%
Test Loss: 1.7514, Test Accuracy: 52.40%
Epoch [9/20]:
Train Loss: 1.6600, Train Accuracy: 53.67%
Test Loss: 1.6768, Test Accuracy: 53.91%
Epoch [10/20]:
Train Loss: 1.5745, Train Accuracy: 55.94%
Test Loss: 1.6973, Test Accuracy: 54.39%
Epoch [11/20]:
Trai

In [None]:
# References:
# - Leaky ReLU: Xu, B., Wang, N., & Li, M. (2015). Empirical Evaluation of Rectified Activations in Convolutional Network*. arXiv.
# - Adam Optimizer: Kingma, D. P., & Ba, J. (2014). Adam: A Method for Stochastic Optimization*. ICLR.
# - PyTorch documentation: https://pytorch.org/tutorials/