In [1]:
%matplotlib inline

two major transfer learning scenarios looks as follows:

-  **Finetuning the convnet**: Instead of random initializaion, we
   initialize the network with a pretrained network, like the one that is
   trained on imagenet 1000 dataset. Rest of the training looks as
   usual.
-  **ConvNet as fixed feature extractor**: Here, we will freeze the weights
   for all of the network except that of the final fully connected
   layer. This last fully connected layer is replaced with a new one
   with random weights and only this layer is trained.




In [19]:
# License: BSD
# Author: Sasank Chilamkurthy

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import Dataset, TensorDataset, DataLoader
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import pandas as pd

plt.ion()   # interactive mode

Load Data
---------

We will use torchvision and torch.utils.data packages for loading the
data.

The problem we're going to solve today is to train a model to classify
**ants** and **bees**. We have about 120 training images each for ants and bees.
There are 75 validation images for each class. Usually, this is a very
small dataset to generalize upon, if trained from scratch. Since we
are using transfer learning, we should be able to generalize reasonably
well.

This dataset is a very small subset of imagenet.

.. Note ::
   Download the data from
   `here <https://download.pytorch.org/tutorial/hymenoptera_data.zip>`_
   and extract it to the current directory.



In [114]:
class AddisDataset(Dataset):
    """Addis dataset."""

    def __init__(self, csv_file, root_dir, column, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file.
            root_dir (string): Directory with all the numpy files.
            column (string): Variable to predict
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data = pd.read_csv(csv_file)[column][1000:1007] # TODO: lol indexing is jank rn will change
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, 's1_median_addis_multiband_500x500_100' + str(idx) + '.0.npy')
        image = np.load(img_name)[:, :, :3]
        labels = self.data[1000 + idx]
        if self.transform:
            image = self.transform(image)

        sample = {'image': image, 'labels': labels}

        return sample

In [129]:
class CenterCrop(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or tuple): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, image):
        h, w = image.shape[:2]
        new_h = new_w = self.output_size

        top = int((h - new_h) / 2)
        left = int((w - new_w) / 2)
        image = image[top:top + new_h, left:left + new_w]

        return image


In [130]:


# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = transforms.Compose([
        CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

data_dir = 'img/train' # TODO: separate into train / val set
dataset = AddisDataset(csv_file='Addis_data_processed.csv',
                                    root_dir=data_dir,
                                    column='water_unavailable_valNO_when_bl_dw19_val1',
                                    transform=data_transforms)

dataloders = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=4)
dataset_size = len(dataset)

use_gpu = torch.cuda.is_available()

In [None]:
# print data
for i in range(len(dataset)):
    sample = dataset[i]

    print(i, sample)

    if i == 3:
        break

Training the model
------------------

Now, let's write a general function to train a model. Here, we will
illustrate:

-  Scheduling the learning rate
-  Saving the best model

In the following, parameter ``scheduler`` is an LR scheduler object from
``torch.optim.lr_scheduler``.



In [125]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloders:
                # get the inputs
                inputs = data['image']
                labels = data['labels'].type(torch.LongTensor)

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_size
            epoch_acc = running_corrects / dataset_size

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

Visualizing the model predictions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Generic function to display predictions for a few images




In [131]:
# TODO: not implemented for current dataset rn
def visualize_model(model, num_images=6):
    images_so_far = 0
    fig = plt.figure()

    for i, data in enumerate(dataloders):
        inputs = data['image']
        labels = data['labels'].type(torch.LongTensor)
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            imshow(inputs.cpu().data[j])

            if images_so_far == num_images:
                return

Finetuning the convnet
----------------------

Load a pretrained model and reset final fully connected layer.




In [105]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

if use_gpu:
    model_ft = model_ft.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Train and evaluate
^^^^^^^^^^^^^^^^^^

It should take around 15-25 min on CPU. On GPU though, it takes less than a
minute.




In [127]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=10)

Epoch 0/9
----------
train Loss: 0.0501 Acc: 1.0000
val Loss: 0.1701 Acc: 0.7143

Epoch 1/9
----------
train Loss: 0.0358 Acc: 1.0000
val Loss: 0.1261 Acc: 1.0000

Epoch 2/9
----------
train Loss: 0.0259 Acc: 1.0000
val Loss: 0.0885 Acc: 1.0000

Epoch 3/9
----------
train Loss: 0.0196 Acc: 1.0000
val Loss: 0.0647 Acc: 1.0000

Epoch 4/9
----------
train Loss: 0.0158 Acc: 1.0000
val Loss: 0.0444 Acc: 1.0000

Epoch 5/9
----------
train Loss: 0.0126 Acc: 1.0000
val Loss: 0.0314 Acc: 1.0000

Epoch 6/9
----------
train Loss: 0.0105 Acc: 1.0000
val Loss: 0.0252 Acc: 1.0000

Epoch 7/9
----------
train Loss: 0.0088 Acc: 1.0000
val Loss: 0.0222 Acc: 1.0000

Epoch 8/9
----------
train Loss: 0.0076 Acc: 1.0000
val Loss: 0.0194 Acc: 1.0000

Epoch 9/9
----------
train Loss: 0.0069 Acc: 1.0000
val Loss: 0.0164 Acc: 1.0000

Training complete in 0m 40s
Best val Acc: 1.000000


In [None]:
visualize_model(model_ft)

ConvNet as fixed feature extractor
----------------------------------

Here, we need to freeze all the network except the final layer. We need
to set ``requires_grad == False`` to freeze the parameters so that the
gradients are not computed in ``backward()``.

You can read more about this in the documentation
`here <http://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>`__.




In [120]:
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

if use_gpu:
    model_conv = model_conv.cuda()

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

Train and evaluate
^^^^^^^^^^^^^^^^^^

On CPU this will take about half the time compared to previous scenario.
This is expected as gradients don't need to be computed for most of the
network. However, forward does need to be computed.




In [133]:
model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train Loss: 0.1347 Acc: 1.0000
val Loss: 0.2509 Acc: 0.0000

Epoch 1/9
----------
train Loss: 0.1327 Acc: 1.0000
val Loss: 0.2147 Acc: 0.0000

Epoch 2/9
----------
train Loss: 0.1294 Acc: 1.0000
val Loss: 0.1601 Acc: 1.0000

Epoch 3/9
----------
train Loss: 0.1308 Acc: 0.8571
val Loss: 0.1107 Acc: 1.0000

Epoch 4/9
----------
train Loss: 0.1249 Acc: 1.0000
val Loss: 0.0827 Acc: 1.0000

Epoch 5/9
----------
train Loss: 0.1251 Acc: 0.8571
val Loss: 0.0715 Acc: 1.0000

Epoch 6/9
----------
train Loss: 0.1260 Acc: 1.0000
val Loss: 0.0672 Acc: 1.0000

Epoch 7/9
----------
train Loss: 0.1287 Acc: 1.0000
val Loss: 0.0638 Acc: 1.0000

Epoch 8/9
----------
train Loss: 0.1231 Acc: 1.0000
val Loss: 0.0637 Acc: 1.0000

Epoch 9/9
----------
train Loss: 0.1195 Acc: 1.0000
val Loss: 0.0596 Acc: 1.0000

Training complete in 0m 18s
Best val Acc: 1.000000


In [None]:
visualize_model(model_conv)

plt.ioff()
plt.show()