In [1]:
import torch
from finetune import Resnet18Finetuner, Resnet18FintunerForCaseD
from dataloader import load_augmented_data
import matplotlib.pyplot as plt
from trainer import Trainer
from torch import optim, nn

In [2]:
finetuner_base = Resnet18Finetuner(pretrained=True, feature_extract=True)
finetuner_A = Resnet18Finetuner(pretrained=True, feature_extract=True)
finetuner_B = Resnet18Finetuner(pretrained=True, feature_extract=False)  # make all conv blocks trainable
finetuner_C = Resnet18FintunerForCaseD(pretrained=True, feature_extract=True)

In [3]:
finetuner_base.model.layer1

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [4]:
data_dir = "../face_dataset"

## Set hyperparameters and share with all cases

In [5]:
input_size = 64
batch_size = 128
num_epochs = 50
lr = 0.002
num_workers = 4  # how many subprocesses to use for data loading

In [6]:
dataloaders_dict = load_augmented_data(input_size, batch_size, data_dir, num_workers)
dataloaders_dict

Initializing Datasets and Dataloaders...


{'train': <torch.utils.data.dataloader.DataLoader at 0x173ec613070>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x173ec6131f0>}

In [7]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

# Case A: Re-train the softmax layer (Baseline model)

When finetuning all resnet layers, feature_extract = False, the model is finetuned and all model parameters are updated.  
If feature_extract = True, only the last layer parameters are updated, the others remain fixed.

In [8]:
finetuner_base.model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
# Train and evaluate
trainer_base = Trainer(finetuner_base)
fine_tuned_model_base, hist_base = trainer_base.fit(
    dataloaders=dataloaders_dict,
    num_epochs=num_epochs,
    lr=lr,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss(),
    device=device
)

train_loss_hist_base, train_acc_hist_base, test_loss_hist_base, test_acc_hist_base = hist_base

Params to learn:
	 fc.weight
	 fc.bias
	 batch_norm.weight
	 batch_norm.bias
Epoch 0/49
----------
train Loss: 4.8311 Acc: 0.0140
test Loss: 4.6219 Acc: 0.0220

Epoch 1/49
----------
train Loss: 4.6177 Acc: 0.0188
test Loss: 4.5748 Acc: 0.0200

Epoch 2/49
----------
train Loss: 4.5095 Acc: 0.0383
test Loss: 4.4646 Acc: 0.0360

Epoch 3/49
----------
train Loss: 4.4534 Acc: 0.0460
test Loss: 4.4074 Acc: 0.0630

Epoch 4/49
----------
train Loss: 4.3987 Acc: 0.0540
test Loss: 4.2781 Acc: 0.0670

Epoch 5/49
----------
train Loss: 4.3118 Acc: 0.0593
test Loss: 4.2258 Acc: 0.0790

Epoch 6/49
----------
train Loss: 4.2124 Acc: 0.0858
test Loss: 4.1897 Acc: 0.0690

Epoch 7/49
----------
train Loss: 4.2049 Acc: 0.0850
test Loss: 4.1475 Acc: 0.0810

Epoch 8/49
----------
train Loss: 4.1756 Acc: 0.0918
test Loss: 4.0682 Acc: 0.1150

Epoch 9/49
----------
train Loss: 4.1331 Acc: 0.0990
test Loss: 4.0427 Acc: 0.1150

Epoch 10/49
----------
train Loss: 4.1934 Acc: 0.0908
test Loss: 4.1276 Acc: 0.0790

## Case B: Fine tune Conv5_x and freeze the rest of Conv blocks

In [10]:
# Unfreeze layer4 in Resnet18
finetuner_A.unfreeze_layer("layer4")

In [11]:
# Train and evaluate
trainer_A = Trainer(finetuner_A)
fine_tuned_model_A, hist_A = trainer_A.fit(
    dataloaders=dataloaders_dict,
    num_epochs=num_epochs,
    lr=lr,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss(),
    device=device
)

train_loss_hist_A, train_acc_hist_A, test_loss_hist_A, test_acc_hist_A = hist_A

Params to learn:
	 fc.weight
	 fc.bias
	 batch_norm.weight
	 batch_norm.bias
Epoch 0/49
----------
train Loss: 4.7991 Acc: 0.0118
test Loss: 4.5914 Acc: 0.0190

Epoch 1/49
----------
train Loss: 4.6453 Acc: 0.0245
test Loss: 4.5337 Acc: 0.0390

Epoch 2/49
----------
train Loss: 4.5377 Acc: 0.0390
test Loss: 4.3956 Acc: 0.0480

Epoch 3/49
----------
train Loss: 4.4407 Acc: 0.0443
test Loss: 4.2939 Acc: 0.0770

Epoch 4/49
----------
train Loss: 4.3715 Acc: 0.0588
test Loss: 4.2223 Acc: 0.0930

Epoch 5/49
----------
train Loss: 4.3228 Acc: 0.0675
test Loss: 4.1641 Acc: 0.0800

Epoch 6/49
----------
train Loss: 4.2493 Acc: 0.0803
test Loss: 4.0809 Acc: 0.0930

Epoch 7/49
----------
train Loss: 4.2062 Acc: 0.0863
test Loss: 4.0860 Acc: 0.0940

Epoch 8/49
----------
train Loss: 4.1669 Acc: 0.0930
test Loss: 4.0011 Acc: 0.1240

Epoch 9/49
----------
train Loss: 4.1229 Acc: 0.1033
test Loss: 3.9549 Acc: 0.1330

Epoch 10/49
----------
train Loss: 4.1828 Acc: 0.0880
test Loss: 4.0104 Acc: 0.1090

## Case C: Fine tune ALL convolution layers.

In [12]:
# Train and evaluate
trainer_B = Trainer(finetuner_B)
fine_tuned_model_B, hist_B = trainer_B.fit(
    dataloaders=dataloaders_dict,
    num_epochs=num_epochs,
    lr=lr,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss(),
    device=device
)

train_loss_hist_B, train_acc_hist_B, test_loss_hist_B, test_acc_hist_B = hist_B

Params to learn:
	 conv1.weight
	 bn1.weight
	 bn1.bias
	 layer1.0.conv1.weight
	 layer1.0.bn1.weight
	 layer1.0.bn1.bias
	 layer1.0.conv2.weight
	 layer1.0.bn2.weight
	 layer1.0.bn2.bias
	 layer1.1.conv1.weight
	 layer1.1.bn1.weight
	 layer1.1.bn1.bias
	 layer1.1.conv2.weight
	 layer1.1.bn2.weight
	 layer1.1.bn2.bias
	 layer2.0.conv1.weight
	 layer2.0.bn1.weight
	 layer2.0.bn1.bias
	 layer2.0.conv2.weight
	 layer2.0.bn2.weight
	 layer2.0.bn2.bias
	 layer2.0.downsample.0.weight
	 layer2.0.downsample.1.weight
	 layer2.0.downsample.1.bias
	 layer2.1.conv1.weight
	 layer2.1.bn1.weight
	 layer2.1.bn1.bias
	 layer2.1.conv2.weight
	 layer2.1.bn2.weight
	 layer2.1.bn2.bias
	 layer3.0.conv1.weight
	 layer3.0.bn1.weight
	 layer3.0.bn1.bias
	 layer3.0.conv2.weight
	 layer3.0.bn2.weight
	 layer3.0.bn2.bias
	 layer3.0.downsample.0.weight
	 layer3.0.downsample.1.weight
	 layer3.0.downsample.1.bias
	 layer3.1.conv1.weight
	 layer3.1.bn1.weight
	 layer3.1.bn1.bias
	 layer3.1.conv2.weight
	 layer3.1.b

## Case D: Freeze all the convolution blocks, introduce two FC layers prior to the softmax layer

In [None]:
trainer_C = Trainer(finetuner_C)
fine_tuned_model_C, hist_C = trainer_C.fit(
    dataloaders=dataloaders_dict,
    num_epochs=num_epochs,
    lr=lr,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss(),
    device=device
)

train_loss_hist_C, train_acc_hist_C, test_loss_hist_C, test_acc_hist_C = hist_C

Params to learn:
	 fc.weight
	 fc.bias
	 batch_norm.weight
	 batch_norm.bias
	 fc_2.weight
	 fc_2.bias
	 batch_norm_2.weight
	 batch_norm_2.bias
Epoch 0/49
----------
train Loss: 4.8110 Acc: 0.0153
test Loss: 4.6239 Acc: 0.0250

Epoch 1/49
----------
train Loss: 4.6297 Acc: 0.0273
test Loss: 4.5325 Acc: 0.0540

Epoch 2/49
----------
train Loss: 4.5233 Acc: 0.0367
test Loss: 4.4474 Acc: 0.0470

Epoch 3/49
----------
train Loss: 4.4347 Acc: 0.0480
test Loss: 4.3844 Acc: 0.0740

Epoch 4/49
----------
train Loss: 4.3738 Acc: 0.0588
test Loss: 4.3049 Acc: 0.0630

Epoch 5/49
----------
train Loss: 4.3222 Acc: 0.0638
test Loss: 4.2392 Acc: 0.0750

Epoch 6/49
----------
train Loss: 4.2632 Acc: 0.0718
test Loss: 4.1751 Acc: 0.0750

Epoch 7/49
----------
train Loss: 4.2155 Acc: 0.0828
test Loss: 4.1104 Acc: 0.1040

Epoch 8/49
----------
train Loss: 4.1776 Acc: 0.0908
test Loss: 4.0805 Acc: 0.1020

Epoch 9/49
----------
train Loss: 4.1345 Acc: 0.1020
test Loss: 4.0615 Acc: 0.1080

Epoch 10/49
---

## Results

In [None]:
plt.plot(train_loss_hist_base, label='Baseline (Train)')
plt.plot(train_loss_hist_A, label='Model A (Train)')
plt.plot(train_loss_hist_B, label='Model B (Train)')
plt.plot(train_loss_hist_C, label='Model C (Train)')
plt.plot(test_loss_hist_base, label='Baseline (Test)')
plt.plot(test_loss_hist_A, label='Model A (Test)')
plt.plot(test_loss_hist_B, label='Model B (Test)')
plt.plot(test_loss_hist_C, label='Model C (Test)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
plt.plot(train_acc_hist_base, label='Baseline (Train)')
plt.plot(train_acc_hist_A, label='Model A (Train)')
plt.plot(train_acc_hist_B, label='Model B (Train)')
plt.plot(train_acc_hist_C, label='Model C (Train)')
plt.plot(test_acc_hist_base, label='Baseline (Test)')
plt.plot(test_acc_hist_A, label='Model A (Test)')
plt.plot(test_acc_hist_B, label='Model B (Test)')
plt.plot(test_acc_hist_C, label='Model C (Test)')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()