# Transfer Learning
### - Already trained model that can be trained and used for other classes
### - Only require to update last layer of the already trained model
# Further 3 things to learn
### 1- Folder Catergorization
### 2- Scheduler

In [58]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models, transforms
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import time
import os
import copy
import matplotlib.pyplot as plt

In [59]:
# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [60]:
# arrays used for normalizing data
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.25, 0.25, 0.25])

In [61]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}

In [62]:
# load data
dir_path = 'data/hymenoptera_data'
# provide the paths of image dataset and load image data
# data transformation is also done
img_data = {x: datasets.ImageFolder(os.path.join(dir_path, x), data_transforms[x])
                  for x in ['train', 'val']}

In [63]:
# data loader
dataloaders = {x: torch.utils.data.DataLoader(img_data[x], batch_size=4, shuffle=True, num_workers=0)
               for x in ['train', 'val']}

In [64]:
# get the classes
dataset_sizes = {x: len(img_data[x]) for x in ['train', 'val']}
class_names = img_data['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(class_names)

['ants', 'bees']


In [65]:
def training_model(model, criterion, optimizer, scheduler, epochs=20):
    time_start = time.time()
    best_model = copy.deepcopy(model.state_dict())
    best_accuracy = 0.0
    
    for epoch in range(epochs):
        print(f'Epoch {epoch}/{epochs-1}')
        print('-'*10)
        
        # training and validation stage of each epoch
        for state in ['train', 'val']:
            if state == 'train':
                model.train() # training model
            else:
                model.eval() # evaluating model
                
            model_running_loss = 0.0
            model_running_corrects = 0
            
            # iteration in dataset
            for images, labels in dataloaders[state]:
                images.to(device)
                labels.to(device)
                
                # forward pass and tracking history of training state
                with torch.set_grad_enabled(state == 'train'):
                    out = model(images)
                    # value, index
                    _, predictions = torch.max(out, 1)
                    loss = criterion(out, labels)
                    
                    # backward pass and optimizer history of training state
                    if state == 'train':
                        # backward pass
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                        
                # results of training
                model_running_loss += loss.item() * images.size(0)
                model_running_corrects += torch.sum(predictions == labels.data)
                
            if state == 'train':
                scheduler.step()

            epoch_loss = model_running_loss / dataset_sizes[state]
            epoch_accuracy = model_running_corrects.double() / dataset_sizes[state]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(state, epoch_loss, epoch_accuracy))

            # copy the best model
            if state == 'val' and epoch_accuracy > best_accuracy:
                best_accuracy = epoch_accuracy
                best_model = copy.deepcopy(model.state_dict())
                
    print()
    time_end = time.time() - time_start
    print('Training completion time: {:.0f}m {:.0f}s'.format(time_end // 60, time_end % 60))
    print('The best val accuracy: {:4f}'.format(best_accuracy))

    # load the best model with weights
    model.load_state_dict(best_model)
    return model

# Method 1: To train transfer models
## Finetuning of Convolutionel Network
### - Load the pretrained model
### - Reset the last fully connected layer of the pretrained model

In [66]:
model = models.resnet18(pretrained=True)

# get number of input features from the last layer
num_feat = model.fc.in_features

# create a new fully connected layer and assign it to the last layer
model.fc = nn.Linear(num_feat, 2)

model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [67]:
# loss and optimizer
criterion =  nn.CrossEntropyLoss() # nn.CrossEntropyLoss() automaticsally applies Softmax
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [68]:
# use scheduler for updating learning rate
# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# every 7 epoch, the lr is multiplied by gamma (means lr update only 10% at each 7 epochs)
lr_sched = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
model = training_model(model, criterion, optimizer, lr_sched, epochs=4)

Epoch 0/3
----------
train Loss: 0.6287 Acc: 0.6680
val Loss: 0.4431 Acc: 0.8562
Epoch 1/3
----------
train Loss: 0.5073 Acc: 0.7541
val Loss: 0.3419 Acc: 0.8824
Epoch 2/3
----------
train Loss: 0.4777 Acc: 0.7951
val Loss: 0.2899 Acc: 0.8954
Epoch 3/3
----------
train Loss: 0.4083 Acc: 0.8402
val Loss: 0.2726 Acc: 0.8889

Training completion time: 1m 35s
The best val accuracy: 0.895425


# Method 2: To train transfer models
## Fix feature extractor
### Freeze all the network except the last fully connected layer

In [69]:
# set requires_grad = False to freeze the parameters so that the gradients are not computed in backward()
model_conv_net = torchvision.models.resnet18(pretrained=True)
for parameters in model_conv_net.parameters():
    parameters.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_features = model_conv_net.fc.in_features
model_conv_net.fc = nn.Linear(num_features, 2)

model_conv_net = model_conv_net.to(device)

In [70]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
# only parameters of last layer are optimized as opposed to method 1.
optimizer_conv_net = optim.SGD(model_conv_net.fc.parameters(), lr=0.001, momentum=0.9)

In [71]:
# use scheduler for updating learning rate
# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# every 7 epoch, the lr is multiplied by gamma (means lr update only 10% at each 7 epochs)
lr_schedu = lr_scheduler.StepLR(optimizer_conv_net, step_size=7, gamma=0.1)
model_conv_net = training_model(model_conv_net, criterion, optimizer_conv_net, lr_schedu, epochs=4)

Epoch 0/3
----------
train Loss: 0.5800 Acc: 0.6639
val Loss: 0.3948 Acc: 0.8105
Epoch 1/3
----------
train Loss: 0.3767 Acc: 0.8279
val Loss: 0.1602 Acc: 0.9477
Epoch 2/3
----------
train Loss: 0.3639 Acc: 0.8279
val Loss: 0.1705 Acc: 0.9412
Epoch 3/3
----------
train Loss: 0.5237 Acc: 0.7869
val Loss: 0.2278 Acc: 0.9216

Training completion time: 0m 46s
The best val accuracy: 0.947712
