In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms, models
import torch.nn.functional as F
from torchvision.models import resnet18
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import os
import copy



device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

model = resnet18(pretrained=True)

cuda


In [2]:
preprocess = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

processed_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=preprocess)
class_names = processed_trainset.classes
processed_testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=preprocess)
dataloaders = {'train' : [], 'val' : []}
dataloaders['train'] = torch.utils.data.DataLoader(processed_trainset, batch_size=100,
                                                    shuffle=True, num_workers=4, pin_memory=True)
dataloaders['val'] = torch.utils.data.DataLoader(processed_testset, batch_size=100,
                                                    shuffle=False, num_workers=4, pin_memory=True)
dataset_sizes = {'train' : len(processed_trainset), 'val' : len(processed_testset)}

Files already downloaded and verified
Files already downloaded and verified


In [3]:
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(512, 10)
model = model.to(device)

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
dataset_sizes = {'train' : 50000 , 'val' : 10000}

In [6]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=50):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [8]:
#Calculating accuracy for last-layer parameters
model_conv, hist = train_model(model, criterion, optimizer,
                         exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------
train Loss: 1.1938 Acc: 0.5886
val Loss: 1.1682 Acc: 0.5919

Epoch 1/49
----------
train Loss: 1.1617 Acc: 0.5955
val Loss: 1.1457 Acc: 0.6006

Epoch 2/49
----------
train Loss: 1.1464 Acc: 0.6014
val Loss: 1.1302 Acc: 0.6030

Epoch 3/49
----------
train Loss: 1.1312 Acc: 0.6062
val Loss: 1.1297 Acc: 0.6108

Epoch 4/49
----------
train Loss: 1.1298 Acc: 0.6067
val Loss: 1.1252 Acc: 0.6070

Epoch 5/49
----------
train Loss: 1.1146 Acc: 0.6098
val Loss: 1.1294 Acc: 0.6011

Epoch 6/49
----------
train Loss: 1.1154 Acc: 0.6117
val Loss: 1.1248 Acc: 0.6053

Epoch 7/49
----------
train Loss: 1.1147 Acc: 0.6118
val Loss: 1.1113 Acc: 0.6120

Epoch 8/49
----------
train Loss: 1.1172 Acc: 0.6097
val Loss: 1.1133 Acc: 0.6113

Epoch 9/49
----------
train Loss: 1.1148 Acc: 0.6092
val Loss: 1.1104 Acc: 0.6151

Epoch 10/49
----------
train Loss: 1.1168 Acc: 0.6094
val Loss: 1.1068 Acc: 0.6077

Epoch 11/49
----------
train Loss: 1.1145 Acc: 0.6100
val Loss: 1.0983 Acc: 0.6177

Ep

In [14]:
for param in model.parameters():
    param.requires_grad = True

In [15]:
# Calculating accuracy for the entire pretained resnet18

since = time.time()
val_acc_history = []

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
num_epochs = 50

for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)
    for inputs, labels in dataloaders['val']:

        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        running_loss = 0.0
        running_corrects = 0

    # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
    epoch_loss = running_loss / dataset_sizes['val']
    epoch_acc = running_corrects.double() / dataset_sizes['val']
    print(f'Validation Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                val_acc_history.append(epoch_acc)

    print()

time_elapsed = time.time() - since
print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    # model.load_state_dict(best_model_wts)
    

Epoch 0/49
----------
Validation Loss: 0.0083 Acc: 0.0070

Epoch 1/49
----------
Validation Loss: 0.0119 Acc: 0.0060

Epoch 2/49
----------
Validation Loss: 0.0116 Acc: 0.0058

Epoch 3/49
----------
Validation Loss: 0.0091 Acc: 0.0075

Epoch 4/49
----------
Validation Loss: 0.0104 Acc: 0.0064

Epoch 5/49
----------
Validation Loss: 0.0105 Acc: 0.0058

Epoch 6/49
----------
Validation Loss: 0.0116 Acc: 0.0060

Epoch 7/49
----------
Validation Loss: 0.0103 Acc: 0.0059

Epoch 8/49
----------
Validation Loss: 0.0100 Acc: 0.0071

Epoch 9/49
----------
Validation Loss: 0.0089 Acc: 0.0069

Epoch 10/49
----------
Validation Loss: 0.0108 Acc: 0.0059

Epoch 11/49
----------
Validation Loss: 0.0108 Acc: 0.0066

Epoch 12/49
----------
Validation Loss: 0.0111 Acc: 0.0061

Epoch 13/49
----------
Validation Loss: 0.0117 Acc: 0.0057

Epoch 14/49
----------
Validation Loss: 0.0106 Acc: 0.0063

Epoch 15/49
----------
Validation Loss: 0.0113 Acc: 0.0060

Epoch 16/49
----------
Validation Loss: 0.0108 Acc

It can be seen by looking at the accuracies of the above two models (one where only last layer parameters are updated and later one is resnet18(pretrained = True)). The accuracy for the pretrained model is much less as compared to the model where last layer parameters are updated. (0.624000 > 0.0075)