# Transfer Learning
On this tutorial we will learn how to load a pretrained imagenet Resnet-18 network, add a new FC layer and train the network again.

After this step we will fix the whole network, add a new FC layer and retrain just the FC layer

Also we will be using the torchvision module that gives the following functionalities
* Pre-trained networks (ie: Resnet-18)
* Data Augmentations and transformations
* Some default data-loaders (ie: ImageFolder loader)
* Some default dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

# Library that gives support for tensorboard and pytorch
from tensorboardX import SummaryWriter

# Set enviroment variable for make only the first GPU visible
import os
os.environ["CUDA_VISIBLE_DEVICES"] = str(0)

plt.ion()   # interactive mode

### Load Data

In [2]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# The ImageFolder expect the classes to be divided in subdirectories
data_dir = 'hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# Point device to gpu (if available) or cpu(otherwise)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Generic Training Function
You can even use this same train_model function on other classification projects

In [3]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # For each epoch (1 complete training set run is one epoch)
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients (Needed before loss.backward and optimizer step)
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            # Get epoch loss and accuracy for the current phase
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model (Copy the best model)
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    # Calculate whole training time
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    # Return best model
    return model

### Load Resnet-18 pretrained with Imagenet

In [4]:
# Get pretrained resnet18 from ImageNet
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Add new FC with 2 outputs
model_ft.fc = nn.Linear(num_ftrs, 2)

# Move to GPU if available
model_ft = model_ft.to(device)

# Multiclass cross entropy loss
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

### Train

In [5]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------
train Loss: 0.5471 Acc: 0.7459
val Loss: 0.2798 Acc: 0.8497

Epoch 1/24
----------
train Loss: 0.5418 Acc: 0.7705
val Loss: 0.2310 Acc: 0.9216

Epoch 2/24
----------
train Loss: 0.6042 Acc: 0.7910
val Loss: 0.3218 Acc: 0.8627

Epoch 3/24
----------
train Loss: 0.7646 Acc: 0.7336
val Loss: 0.6736 Acc: 0.8235

Epoch 4/24
----------
train Loss: 0.5249 Acc: 0.7951
val Loss: 0.3026 Acc: 0.8954

Epoch 5/24
----------
train Loss: 0.6124 Acc: 0.7664
val Loss: 0.2187 Acc: 0.8954

Epoch 6/24
----------
train Loss: 0.5330 Acc: 0.8033
val Loss: 0.2255 Acc: 0.9150

Epoch 7/24
----------
train Loss: 0.3840 Acc: 0.8156
val Loss: 0.2051 Acc: 0.9150

Epoch 8/24
----------
train Loss: 0.2572 Acc: 0.8893
val Loss: 0.2257 Acc: 0.8954

Epoch 9/24
----------
train Loss: 0.2736 Acc: 0.8770
val Loss: 0.2141 Acc: 0.9281

Epoch 10/24
----------
train Loss: 0.3274 Acc: 0.8525
val Loss: 0.1945 Acc: 0.9542

Epoch 11/24
----------
train Loss: 0.3169 Acc: 0.8648
val Loss: 0.1996 Acc: 0.9281

Ep

### Use Resnet-18 as feature extraction
* Fix all layers
* Add new FC layer
* Train

Observe that the training will be faster now

In [None]:
model_conv = torchvision.models.resnet18(pretrained=True)
# Fix all layers gradients computation (requires_grad = False)
for param in model_conv.parameters():
    param.requires_grad = False

# Add new FC layer at the end (By default the gradient is enabled when you create a layer)
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

# Move to GPU if available
model_conv = model_conv.to(device)

# Multiclass cross entropy loss
criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

In [None]:
model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------
train Loss: 0.7666 Acc: 0.5984
val Loss: 0.4306 Acc: 0.8039

Epoch 1/24
----------
train Loss: 0.5623 Acc: 0.7295
val Loss: 0.2082 Acc: 0.9281

Epoch 2/24
----------
train Loss: 0.4022 Acc: 0.8033
val Loss: 0.1918 Acc: 0.9412

Epoch 3/24
----------
train Loss: 0.4343 Acc: 0.8238
val Loss: 0.1913 Acc: 0.9412

Epoch 4/24
----------
train Loss: 0.3073 Acc: 0.8607
val Loss: 0.1913 Acc: 0.9412

Epoch 5/24
----------
train Loss: 0.3776 Acc: 0.8402
val Loss: 0.2855 Acc: 0.9085

Epoch 6/24
----------
train Loss: 0.4162 Acc: 0.8074
val Loss: 0.2063 Acc: 0.9412

Epoch 7/24
----------
train Loss: 0.4041 Acc: 0.8197
val Loss: 0.1935 Acc: 0.9542

Epoch 8/24
----------
train Loss: 0.2875 Acc: 0.8934
val Loss: 0.2052 Acc: 0.9412

Epoch 9/24
----------
train Loss: 0.3297 Acc: 0.8730
val Loss: 0.1962 Acc: 0.9412

Epoch 10/24
----------
train Loss: 0.2794 Acc: 0.8811
val Loss: 0.1891 Acc: 0.9412

Epoch 11/24
----------
train Loss: 0.3921 Acc: 0.8279
val Loss: 0.1984 Acc: 0.9477

Ep