<a href="https://colab.research.google.com/github/suhas-bvp/session7/blob/master/experiment1_CIFAR_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import ssl
import numpy as np
import matplotlib.pyplot as plt

# Bypass SSL certificate verification for dataset download
ssl._create_default_https_context = ssl._create_unverified_context

# Function to create a ResNet-50 model for CIFAR-100
# - Uses torchvision's resnet50 implementation (deeper than resnet18)
# - Sets output layer to 100 classes (CIFAR-100)
# - No pre-trained weights are used
# - Returns the model
def get_resnet(num_classes=100):
    model = torchvision.models.resnet50(weights=None)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

# Function to create data loaders for CIFAR-100
# - Applies advanced data augmentation and normalization for training
# - Normalizes test data
# - Returns train and test data loaders
# - Downloads data if not present
# - Uses batch size 128 for training, 100 for testing
# - Uses 2 worker threads for loading data
def get_dataloaders(batch_size=128):
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.RandomErasing(p=0.5, scale=(0.02, 0.2)),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
    ])
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)
    return trainloader, testloader

# CutMix implementation for regularization
def cutmix_data(x, y, alpha=1.0):
    '''
    Returns mixed inputs, pairs of targets, and lambda
    '''
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lam)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[index, :, bbx1:bbx2, bby1:bby2]
    y_a, y_b = y, y[index]
    return x, y_a, y_b, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)
    cut_h = int(H * cut_rat)
    cx = np.random.randint(W)
    cy = np.random.randint(H)
    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    return bbx1, bby1, bbx2, bby2

# Function to train and evaluate the model
# - Performs training and validation (testing) for a specified number of epochs
# - Uses label smoothing, CutMix, cosine annealing, and mixed precision
# - Logs training and test loss/accuracy to a file and prints to console
# - Saves the best model based on test accuracy
def train(model, trainloader, testloader, device, epochs=100, lr=0.1, log_file='training_logs.md'):
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    # Use torch.amp.GradScaler and torch.amp.autocast as per new API (without device_type argument)
    scaler = torch.amp.GradScaler() if torch.cuda.is_available() else None
    best_acc = 0.0
    train_losses, train_accs, test_losses, test_accs = [], [], [], []
    with open(log_file, 'w') as f:
        f.write('| Epoch | Train Loss | Train Acc (%) | Test Loss | Test Acc (%) |\n')
        f.write('|-------|------------|----------------|-----------|--------------|\n')
        for epoch in range(1, epochs+1):
            model.train()
            running_loss = 0.0
            correct_train = 0
            total_train = 0
            for inputs, targets in tqdm(trainloader, desc=f'Epoch {epoch}/{epochs}'):
                inputs, targets = inputs.to(device), targets.to(device)
                # Apply CutMix with 50% probability
                r = np.random.rand()
                if r < 0.5:
                    inputs, targets_a, targets_b, lam = cutmix_data(inputs, targets)
                    with torch.amp.autocast('cuda', enabled=scaler is not None):
                        outputs = model(inputs)
                        loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
                else:
                    with torch.amp.autocast('cuda', enabled=scaler is not None):
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                optimizer.zero_grad()
                if scaler:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                else:
                    loss.backward()
                    optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total_train += targets.size(0)
                correct_train += predicted.eq(targets).sum().item()
            train_loss = running_loss / len(trainloader.dataset)
            train_acc = 100. * correct_train / total_train
            train_losses.append(train_loss)
            train_accs.append(train_acc)
            # Validation (test) phase
            model.eval()
            test_loss = 0.0
            correct = 0
            total = 0
            with torch.no_grad():
                for inputs, targets in testloader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    with torch.amp.autocast('cuda', enabled=scaler is not None):
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                    test_loss += loss.item() * inputs.size(0)
                    _, predicted = outputs.max(1)
                    total += targets.size(0)
                    correct += predicted.eq(targets).sum().item()
            test_loss = test_loss / len(testloader.dataset)
            test_acc = 100. * correct / total
            test_losses.append(test_loss)
            test_accs.append(test_acc)
            # Log results to file and print to console
            f.write(f'| {epoch} | {train_loss:.4f} | {train_acc:.2f} | {test_loss:.4f} | {test_acc:.2f} |\n')
            print(f'Epoch {epoch}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.2f}%, Test Loss={test_loss:.4f}, Test Acc={test_acc:.2f}%')
            # Save best model
            if test_acc > best_acc:
                best_acc = test_acc
                torch.save(model.state_dict(), 'best_resnet50_cifar100.pth')
            scheduler.step()
    # Plot graphs after training
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(range(1, epochs+1), train_losses, label='Train Loss')
    plt.plot(range(1, epochs+1), test_losses, label='Test Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss vs Epoch')
    plt.legend()
    plt.subplot(1,2,2)
    plt.plot(range(1, epochs+1), train_accs, label='Train Accuracy')
    plt.plot(range(1, epochs+1), test_accs, label='Test Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title('Accuracy vs Epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig('training_curves.png')
    plt.show()

# Main function to set up device, data loaders, model, and start training
# - Uses GPU if available, otherwise CPU
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    trainloader, testloader = get_dataloaders()
    model = get_resnet().to(device)
    train(model, trainloader, testloader, device)

if __name__ == '__main__':
    main()


Epoch 1/100: 100%|██████████| 391/391 [00:50<00:00,  7.67it/s]


Epoch 1: Train Loss=5.4650, Train Acc=1.31%, Test Loss=4.7506, Test Acc=1.61%


Epoch 2/100: 100%|██████████| 391/391 [00:49<00:00,  7.83it/s]


Epoch 2: Train Loss=4.5531, Train Acc=2.13%, Test Loss=4.6956, Test Acc=4.15%


Epoch 3/100: 100%|██████████| 391/391 [00:49<00:00,  7.91it/s]


Epoch 3: Train Loss=4.4018, Train Acc=4.14%, Test Loss=4.2430, Test Acc=7.04%


Epoch 4/100: 100%|██████████| 391/391 [00:49<00:00,  7.87it/s]


Epoch 4: Train Loss=4.2938, Train Acc=5.68%, Test Loss=4.0496, Test Acc=9.71%


Epoch 5/100: 100%|██████████| 391/391 [00:49<00:00,  7.93it/s]


Epoch 5: Train Loss=4.2416, Train Acc=6.88%, Test Loss=3.9547, Test Acc=10.76%


Epoch 6/100: 100%|██████████| 391/391 [00:47<00:00,  8.21it/s]


Epoch 6: Train Loss=4.1841, Train Acc=8.33%, Test Loss=3.8533, Test Acc=13.64%


Epoch 7/100: 100%|██████████| 391/391 [00:48<00:00,  8.13it/s]


Epoch 7: Train Loss=4.1423, Train Acc=9.20%, Test Loss=3.9123, Test Acc=13.57%


Epoch 8/100: 100%|██████████| 391/391 [00:47<00:00,  8.16it/s]


Epoch 8: Train Loss=4.0722, Train Acc=10.63%, Test Loss=3.9339, Test Acc=16.42%


Epoch 9/100: 100%|██████████| 391/391 [00:48<00:00,  8.04it/s]


Epoch 9: Train Loss=4.0566, Train Acc=11.45%, Test Loss=3.7845, Test Acc=17.23%


Epoch 10/100: 100%|██████████| 391/391 [00:48<00:00,  7.99it/s]


Epoch 10: Train Loss=4.0205, Train Acc=11.90%, Test Loss=3.6426, Test Acc=19.18%


Epoch 11/100: 100%|██████████| 391/391 [00:49<00:00,  7.97it/s]


Epoch 11: Train Loss=3.9602, Train Acc=13.09%, Test Loss=3.5486, Test Acc=19.58%


Epoch 12/100:  87%|████████▋ | 340/391 [00:41<00:06,  8.45it/s]