Question 2: Finetuning a pre-trained network and the optimizers [5+3x5=20 marks]
Finetune the pre-trained ResNet101 model (trained with the ImageNet dataset and made
available on the PyTorch torchvision server) on the X dataset for classification tasks and plot
curves for training loss and training accuracy. Report the final top-5 test accuracy. Perform the
above task with any 3 optimizers from the following list.
1. Adam
2. Adagrad
3. Adadelta
4. RMSprop

In [None]:
import numpy as np
import pandas as pd

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim

%matplotlib inline

In [None]:
# transforms
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

# datasets
trainset = torchvision.datasets.STL10('./data',split='train',
    download=True,
    transform=transform)
testset = torchvision.datasets.STL10('./data',
    download=True,
    split='test',
    transform=transform)

# dataloaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                        shuffle=True, num_workers=2)


testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                        shuffle=False, num_workers=2)

def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [01:29<00:00, 29614433.62it/s]


Extracting ./data/stl10_binary.tar.gz to ./data
Files already downloaded and verified


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
# From torchvision.models
from torchvision import models
model = models.resnet101(pretrained=False)



In [None]:
model_conv = models.resnet101(weights='IMAGENET1K_V1')
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 10)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.Adam(model_conv.fc.parameters(), lr=0.001)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 151MB/s]


In [None]:
if torch.cuda.is_available():
    model_conv.cuda()

In [None]:
from tempfile import TemporaryDirectory
import time
import os
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    since = time.time()
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs}')
            print('-' * 10)
            model.train()
            total_loss = 0.0

            # Iterate over data.
            for inputs, labels in trainloader:
                inputs = inputs.to(torch.device('cuda'))
                labels = labels.to(torch.device('cuda'))

                # zero the parameter gradients
                optimizer.zero_grad()
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                loss.backward()
                optimizer.step()

                scheduler.step()

                total_loss += loss.item()
            average_loss = total_loss / len(trainloader)

            model.eval()
            val_total_loss = 0.0
            all_labels = []
            all_predictions = []
            with torch.no_grad():
                for features, labels in testloader:
                    features = features.to(torch.device('cuda'))
                    labels = labels.to(torch.device('cuda'))
                    # Forward pass
                    outputs = model(features)
                    #print(outputs)

                    val_loss = criterion(outputs, labels)
                    val_total_loss += val_loss.item()

                    all_labels.extend(labels.tolist())
                    all_predictions.extend(outputs.tolist())

            average_val_loss = val_total_loss / len(testloader)

            print(f'train Loss: {average_loss:.4f} Validation Loss: {average_val_loss:.4f}')
    return model

In [None]:
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=5)

Epoch 0/5
----------
train Loss: 2.7661 Validation Loss: 37.6591
Epoch 1/5
----------
train Loss: 2.7722 Validation Loss: 20.9205
Epoch 2/5
----------
train Loss: 2.7662 Validation Loss: 23.4228
Epoch 3/5
----------


KeyboardInterrupt: 

In [None]:
optimizer_conv = optim.Adagrad(model_conv.fc.parameters(), lr=0.001,lr_decay=0, weight_decay=0)

model_conv1 = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=1)

Epoch 0/1
----------
train Loss: 1.5831 Validation Loss: 5.3309


In [None]:
optimizer_conv = optim.Adadelta(model_conv.fc.parameters(), lr=0.001,rho=0.9)

model_conv2 = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=1)

Epoch 0/1
----------
train Loss: 1.3330 Validation Loss: 6.6607


In [None]:
optimizer_conv = optim.RMSprop(model_conv.fc.parameters(), lr=0.001,alpha=0.99,eps=1e-08, weight_decay=0, momentum=0)

model_conv3 = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=1)

Epoch 0/1
----------
train Loss: 1.2624 Validation Loss: 117.1202
