In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [4]:
transform = transforms.Compose([
    transforms.Resize(224),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225]),
])


# CIFAR-10
trainset_10 = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
testset_10 = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)

trainloader_10 = DataLoader(trainset_10, batch_size=128, shuffle=True, num_workers=2)
testloader_10 = DataLoader(testset_10, batch_size=128, shuffle=False, num_workers=2)

# CIFAR-100
trainset_100 = torchvision.datasets.CIFAR100(root="./data", train=True, download=True, transform=transform)
testset_100 = torchvision.datasets.CIFAR100(root="./data", train=False, download=True, transform=transform)

trainloader_100 = DataLoader(trainset_100, batch_size=128, shuffle=True, num_workers=2)
testloader_100 = DataLoader(testset_100, batch_size=128, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [5]:
print("CIFAR-10:", len(trainset_10), "train samples,", len(testset_10), "test samples")
print("CIFAR-100:", len(trainset_100), "train samples,", len(testset_100), "test samples")

CIFAR-10: 50000 train samples, 10000 test samples
CIFAR-100: 50000 train samples, 10000 test samples


In [6]:
img, label = trainset_10[0]
print("CIFAR-10 single image shape:", img.shape)
print("CIFAR-10 label:", label)

img, label = trainset_100[0]
print("CIFAR-100 single image shape:", img.shape)
print("CIFAR-100 label:", label)

CIFAR-10 single image shape: torch.Size([3, 224, 224])
CIFAR-10 label: 6
CIFAR-100 single image shape: torch.Size([3, 224, 224])
CIFAR-100 label: 19


In [7]:
images, labels = next(iter(trainloader_10))
print("Trainloader (CIFAR-10) batch images shape:", images.shape)
print("Trainloader (CIFAR-10) batch labels shape:", labels.shape)

images, labels = next(iter(trainloader_100))
print("Trainloader (CIFAR-100) batch images shape:", images.shape)
print("Trainloader (CIFAR-100) batch labels shape:", labels.shape)

Trainloader (CIFAR-10) batch images shape: torch.Size([128, 3, 224, 224])
Trainloader (CIFAR-10) batch labels shape: torch.Size([128])
Trainloader (CIFAR-100) batch images shape: torch.Size([128, 3, 224, 224])
Trainloader (CIFAR-100) batch labels shape: torch.Size([128])


In [8]:
import torch.nn as nn

In [9]:
class AlexNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2.0),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2),
            nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2.0),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            
            nn.Dropout(),
            nn.Linear(4096, 4096),

            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [10]:
import torch
device = "cuda"

In [11]:
import torch.optim as optim

model = AlexNet(num_classes=10).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


def accuracy(outputs, labels):
    _, preds = torch.max(outputs, 1)
    return (preds == labels).float().mean().item()

In [12]:
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, running_acc = 0.0, 0.0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_acc += accuracy(outputs, labels)

    return running_loss / len(loader), running_acc / len(loader)


def test(model, loader, criterion, device):
    model.eval()
    running_loss, running_acc = 0.0, 0.0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            running_acc += accuracy(outputs, labels)

    return running_loss / len(loader), running_acc / len(loader)


In [12]:
EPOCHS = 10
for epoch in range(1, EPOCHS+1):
    train_loss, train_acc = train(model, trainloader_10, loss_fn, optimizer, device)
    test_loss, test_acc = test(model, testloader_10, loss_fn, device)

    print(f"Epoch [{epoch}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}% "
          f"| Test Loss: {test_loss:.4f}, Test Acc: {test_acc*100:.2f}%")

Epoch [1/10] Train Loss: 2.3004, Train Acc: 14.36% | Test Loss: 2.2983, Test Acc: 20.83%
Epoch [2/10] Train Loss: 2.2945, Train Acc: 13.54% | Test Loss: 2.2790, Test Acc: 13.22%
Epoch [3/10] Train Loss: 2.2307, Train Acc: 15.68% | Test Loss: 2.1390, Test Acc: 23.66%
Epoch [4/10] Train Loss: 2.0687, Train Acc: 24.83% | Test Loss: 2.0287, Test Acc: 27.03%
Epoch [5/10] Train Loss: 1.9568, Train Acc: 27.56% | Test Loss: 1.8942, Test Acc: 31.37%
Epoch [6/10] Train Loss: 1.8232, Train Acc: 31.76% | Test Loss: 1.7400, Test Acc: 36.59%
Epoch [7/10] Train Loss: 1.7136, Train Acc: 36.32% | Test Loss: 1.6344, Test Acc: 39.21%
Epoch [8/10] Train Loss: 1.6348, Train Acc: 39.66% | Test Loss: 1.6427, Test Acc: 40.26%
Epoch [9/10] Train Loss: 1.5625, Train Acc: 42.57% | Test Loss: 1.5066, Test Acc: 44.42%
Epoch [10/10] Train Loss: 1.4998, Train Acc: 45.13% | Test Loss: 1.4475, Test Acc: 47.25%


In [13]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

# Trainable params
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable parameters: {trainable_params:,}")

Total parameters: 56,909,194
Trainable parameters: 56,909,194
