In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader


device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
alex_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
batch_size=32
trainset=torchvision.datasets.CIFAR10(root='./data',train=True,download=True,transform=alex_transform)
trainloader=DataLoader(trainset, batch_size=batch_size,shuffle=True,num_workers=2)
testset=torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=alex_transform)
testloader=DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
criterion=nn.CrossEntropyLoss()

def train_epochs(net,optimizer,title):
    print(f'{title}')
    net.train()
    for epoch in range(3):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs=inputs.to(device)
            labels=labels.to(device)
            optimizer.zero_grad()
            outputs=net(inputs)
            loss=criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss+=loss.item()
        avg_loss=running_loss/len(trainloader)
        print(f'Epoch {epoch + 1} -> Loss: {avg_loss:.4f}')
def test_accuracy(net):
    net.eval()
    correct=0
    total=0
    with torch.no_grad():
        for data in testloader:
            images,labels=data
            images,labels=images.to(device),labels.to(device)
            outputs=net(images)
            _,predicted=torch.max(outputs.data,1)
            total+=labels.size(0)
            correct+=(predicted==labels).sum().item()
    acc =100.0*correct/total
    print(f'Test Accuracy: {acc:.2f}%')
    return acc

alex_finetune=models.alexnet(weights='IMAGENET1K_V1')
alex_finetune.classifier[6]=nn.Linear(4096, 10)
alex_finetune=alex_finetune.to(device)
optimizer_alex_ft=optim.SGD(alex_finetune.parameters(),lr=0.001,momentum=0.9)
train_epochs(alex_finetune, optimizer_alex_ft,'AlexNet | Finetuning')
acc_alex_ft=test_accuracy(alex_finetune)

alex_fixed = models.alexnet(weights='IMAGENET1K_V1')
alex_fixed.classifier[6] = nn.Linear(4096, 10)
for p in alex_fixed.parameters():
    p.requires_grad=False
for p in alex_fixed.classifier[6].parameters():
    p.requires_grad=True
alex_fixed=alex_fixed.to(device)
optimizer_alex_fx=optim.SGD(alex_fixed.classifier[6].parameters(),lr=0.001,momentum=0.9)
train_epochs(alex_fixed,optimizer_alex_fx,'AlexNet | Feature Extractor')
acc_alex_fx=test_accuracy(alex_fixed)

vgg_finetune=models.vgg16(weights='IMAGENET1K_V1')
vgg_finetune.classifier[6]=nn.Linear(4096, 10)
vgg_finetune=vgg_finetune.to(device)
optimizer_vgg_ft=optim.SGD(vgg_finetune.parameters(),lr=0.001,momentum=0.9)
train_epochs(vgg_finetune,optimizer_vgg_ft,'VGG16 | Finetuning')
acc_vgg_ft=test_accuracy(vgg_finetune)

vgg_fixed=models.vgg16(weights='IMAGENET1K_V1')
vgg_fixed.classifier[6]=nn.Linear(4096, 10)
for p in vgg_fixed.parameters():
    p.requires_grad=False
for p in vgg_fixed.classifier[6].parameters():
    p.requires_grad=True
vgg_fixed=vgg_fixed.to(device)
optimizer_vgg_fx=optim.SGD(vgg_fixed.classifier[6].parameters(),lr=0.001,momentum=0.9)
train_epochs(vgg_fixed, optimizer_vgg_fx,'VGG16 | Feature Extractor')
acc_vgg_fx=test_accuracy(vgg_fixed)

print('Summary:')
print('AlexNet   (Finetune):  ',f'{acc_alex_ft:.2f}%')
print('AlexNet   (Fixed):     ',f'{acc_alex_fx:.2f}%')
print('VGG16     (Finetune):  ',f'{acc_vgg_ft:.2f}%')
print('VGG16     (Fixed):     ',f'{acc_vgg_fx:.2f}%')


AlexNet | Finetuning
Epoch 1 -> Loss: 0.6344
Epoch 2 -> Loss: 0.3991
Epoch 3 -> Loss: 0.3120
Test Accuracy: 89.17%
AlexNet | Feature Extractor
Epoch 1 -> Loss: 0.7910
Epoch 2 -> Loss: 0.6848
Epoch 3 -> Loss: 0.6561
Test Accuracy: 81.32%
VGG16 | Finetuning
Epoch 1 -> Loss: 0.4812
Epoch 2 -> Loss: 0.2438
Epoch 3 -> Loss: 0.1575
Test Accuracy: 92.23%
VGG16 | Feature Extractor
Epoch 1 -> Loss: 0.7395
Epoch 2 -> Loss: 0.6367
Epoch 3 -> Loss: 0.6146
Test Accuracy: 81.96%
Summary:
AlexNet   (Finetune):   89.17%
AlexNet   (Fixed):      81.32%
VGG16     (Finetune):   92.23%
VGG16     (Fixed):      81.96%


From the four runs on CIFAR-10 (3 epochs and batch size 32)the pattern is clear finetuning beats fixed feature extraction for both models and VGG16 > AlexNet when we allow all weights to adapt With finetuning VGG16 reached 92.23% while AlexNet achieved 89.17% in contrast freezing the backbone and training only the last fully connected layer capped performance around 82% for both (81.96% VGG16,81.32% AlexNet). The loss curves explain why finetuned models showed strong steady decreases each epoch (VGG16 FT from 0.4812→0.2432→0.1575, AlexNet FT 0.6344 →0.3991→0.3120) meaning the entire feature hierarchy was being reshaped for CIFAR-10 When used as fixed feature extractors the backbones stayed ImageNet-biased so the classifier could only make limited improvements (loss dropped much less: VGG16 FX 0.7395→0.6367→0.6146 AlexNet FX 0.7919→0.6848→0.6561). The finetuning advantage is expected because CIFAR-10 images 10 classes smaller objects different textures differ from ImageNet so allowing convolutional layers to update helps align low and midlevel filters to this dataset VGG16’s deeper architecture likely explains its edge over AlexNet when finetuned (more capacity to model fine details) while in the frozen setting both sit near the same ceiling since only the last layer learns. Overall these results suggest that for CIFAR10, updating the whole network is worth it and VGG16 yields the best accuracy when finetuned with all models showing healthy monotonic loss reductions across the three epochs.