_Task 1: Inner Workings of ResNet-152_

_4. Transfer Learning_

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet152


Using pre-trained ResNet-152 weights and default initialization for analysis.

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_inet = resnet152(weights='IMAGENET1K_V1')
model_inet.fc = nn.Linear(model_inet.fc.in_features, 10)
for param in model_inet.parameters():
    param.requires_grad = False
for param in model_inet.fc.parameters():
    param.requires_grad = True
model_inet = model_inet.to(device)


model_nw = resnet152(weights=None)
model_nw.fc = nn.Linear(model_nw.fc.in_features, 10)
for param in model_nw.parameters():
    param.requires_grad = False
for param in model_nw.fc.parameters():
    param.requires_grad = True
model_nw = model_nw.to(device)

print(device)

cuda


Preparing dataloaders

In [3]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),
                         (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.FashionMNIST(
    root='./data', train=True, download=True, transform=transform
)
testset = torchvision.datasets.FashionMNIST(
    root='./data', train=False, download=True, transform=transform
)

trainloader = DataLoader(trainset, batch_size=192, shuffle=True, num_workers=4)
testloader  = DataLoader(testset, batch_size=192, shuffle=False, num_workers=4)


In [4]:
criterion = nn.CrossEntropyLoss()

optimizer_inet = optim.SGD(model_inet.fc.parameters(), lr=0.02, momentum=0.9)
optimizer_nw = optim.SGD(model_nw.fc.parameters(), lr=0.02, momentum=0.9)

In [5]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0

    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    avg_loss = running_loss / total
    accuracy = 100. * correct / total
    return avg_loss, accuracy


def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    avg_loss = running_loss / total
    accuracy = 100. * correct / total
    return avg_loss, accuracy

In [6]:
num_epochs = 5
print("Using ImageNet weights.")
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model_inet, trainloader, criterion, optimizer_inet, device)
    val_loss, val_acc = evaluate(model_inet, testloader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"  Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.2f}%")

Using ImageNet weights.
Epoch 1/5
  Train Loss: 0.5609 | Train Acc: 80.16%
  Val   Loss: 0.5029 | Val   Acc: 81.85%
Epoch 2/5
  Train Loss: 0.4347 | Train Acc: 84.35%
  Val   Loss: 0.4296 | Val   Acc: 84.82%
Epoch 3/5
  Train Loss: 0.4112 | Train Acc: 85.31%
  Val   Loss: 0.4342 | Val   Acc: 84.12%
Epoch 4/5
  Train Loss: 0.3960 | Train Acc: 85.66%
  Val   Loss: 0.4065 | Val   Acc: 85.30%
Epoch 5/5
  Train Loss: 0.3746 | Train Acc: 86.65%
  Val   Loss: 0.4097 | Val   Acc: 85.63%


In [7]:
print("Using randomly initialized weights.")
for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model_nw, trainloader, criterion, optimizer_nw, device)
    val_loss, val_acc = evaluate(model_nw, testloader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"  Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.2f}%")

Using randomly initialized weights.
Epoch 1/5
  Train Loss: 23.8095 | Train Acc: 14.30%
  Val   Loss: 14.8177 | Val   Acc: 21.15%
Epoch 2/5
  Train Loss: 12.4919 | Train Acc: 24.16%
  Val   Loss: 10.5040 | Val   Acc: 25.27%
Epoch 3/5
  Train Loss: 6.7194 | Train Acc: 32.31%
  Val   Loss: 6.1345 | Val   Acc: 26.64%
Epoch 4/5
  Train Loss: 4.3291 | Train Acc: 38.91%
  Val   Loss: 3.1866 | Val   Acc: 45.16%
Epoch 5/5
  Train Loss: 4.2665 | Train Acc: 39.00%
  Val   Loss: 2.5595 | Val   Acc: 45.44%
