In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import time


In [2]:
# --- Configuration and Device Setup ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

LEARNING_RATE = 0.01
BATCH_SIZE = 64
EPOCHS = 25

Using device: cpu


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
# --- 1. Dataset and DataLoaders ---
print("Step 1: Preparing DataLoaders...")

# Transformations for the training and test sets
# Includes data augmentation for the training set to improve generalization
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

# Download CIFAR-100 dataset
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=test_transform)

# Create DataLoaders
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("DataLoaders created successfully.")

Step 1: Preparing DataLoaders...
Files already downloaded and verified
Files already downloaded and verified
DataLoaders created successfully.


In [4]:
# --- The VGG Model Class ---
print("\nStep 2: Building the VGG-style Model...")

class VGG_CIFAR100(nn.Module):
    def __init__(self, num_classes=100):
        super(VGG_CIFAR100, self).__init__()
        
        # Convolutional Blocks (Feature Extractor)
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # Classifier (Head)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 4 * 4, 512), # 4x4 from 32x32 image after 3 maxpools
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = VGG_CIFAR100(num_classes=100).to(DEVICE)
print("Model built and moved to device.")
print(model)


Step 2: Building the VGG-style Model...
Model built and moved to device.
VGG_CIFAR100(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
 

In [5]:
# --- Optimizer, Loss, and Accuracy Function ---
print("\nStep 3: Defining Optimizer, Loss, and Accuracy Functions...")

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

def calculate_accuracy(y_pred, y_true):
    """Calculates accuracy given predictions and true labels."""
    predicted = torch.argmax(y_pred, 1)
    correct = (predicted == y_true).sum().item()
    return correct / len(y_true)

print("Functions defined.")


Step 3: Defining Optimizer, Loss, and Accuracy Functions...
Functions defined.


In [6]:
# --- Train and Test Loops ---
print("\nStep 4: Defining Train and Test Loops...")

def train_loop(model, data_loader, loss_fn, optimizer, device):
    """Performs one epoch of training."""
    model.train()
    total_loss, total_acc = 0, 0
    
    for X, y in data_loader:
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)

        # 2. Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 3. Accumulate loss and accuracy
        total_loss += loss.item()
        total_acc += calculate_accuracy(pred, y)

    # Return average loss and accuracy for the epoch
    avg_loss = total_loss / len(data_loader)
    avg_acc = total_acc / len(data_loader)
    return avg_loss, avg_acc

def test_loop(model, data_loader, loss_fn, device):
    """Performs one epoch of testing/validation."""
    model.eval()
    total_loss, total_acc = 0, 0

    with torch.no_grad():
        for X, y in data_loader:
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            pred = model(X)
            loss = loss_fn(pred, y)

            # 2. Accumulate loss and accuracy
            total_loss += loss.item()
            total_acc += calculate_accuracy(pred, y)

    # Return average loss and accuracy for the epoch
    avg_loss = total_loss / len(data_loader)
    avg_acc = total_acc / len(data_loader)
    return avg_loss, avg_acc

print("Loops defined.")


Step 4: Defining Train and Test Loops...
Loops defined.


In [7]:
# --- Main Training Execution ---
print("\nStep 5: Starting the Training Process...")
print("-" * 50)

for epoch in range(EPOCHS):
    start_time = time.time()
    
    train_loss, train_acc = train_loop(model, train_loader, loss_fn, optimizer, DEVICE)
    test_loss, test_acc = test_loop(model, test_loader, loss_fn, DEVICE)
    
    end_time = time.time()
    epoch_mins, epoch_secs = divmod(end_time - start_time, 60)

    print(f"Epoch: {epoch+1:02} | Time: {int(epoch_mins)}m {int(epoch_secs)}s")
    print(f"\tTrain Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}%")
    print(f"\tTest  Loss: {test_loss:.4f} | Test  Acc: {test_acc*100:.2f}%")

print("\nTraining finished!")


Step 5: Starting the Training Process...
--------------------------------------------------
Epoch: 01 | Time: 3m 22s
	Train Loss: 5.2894 | Train Acc: 0.97%
	Test  Loss: 4.6074 | Test  Acc: 1.03%
Epoch: 02 | Time: 3m 45s
	Train Loss: 4.6089 | Train Acc: 0.96%
	Test  Loss: 4.6077 | Test  Acc: 1.00%


KeyboardInterrupt: 