# Transfer learning with PyTorch
We're going to train a neural network to classify dogs and cats.

## Init, helpers, utils, ...

In [3]:
from pprint import pprint
from IPython.core.debugger import set_trace

import matplotlib.pyplot as plt
import torch
import numpy as np


%matplotlib inline

In [4]:
def attr(obj):
    """
    Return all public attributes of an object.
    """
    return [x for x in dir(obj) if not x.startswith("_")]

In [5]:
# Training helpers

def get_trainable(model_params):
    return (p for p in model_params if p.requires_grad)


def get_frozen(model_params):
    return (p for p in model_params if not p.requires_grad)


def all_trainable(model_params):
    return all(p.requires_grad for p in model_params)


def all_frozen(model_params):
    return all(not p.requires_grad for p in model_params)


def freeze_all(model_params):
    for param in model_params:
        param.requires_grad = False


# list(get_trainable(model.parameters()))
# list(get_frozen(model.parameters()))
# all_trainable(model.parameters())
# all_frozen(model.parameters())

# The Basics

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

## Transforms

In [8]:
from torchvision import transforms

IMG_SIZE = 224
_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]


train_trans = transforms.Compose([
    transforms.Resize(256),  # some images are pretty small
    transforms.RandomCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(.3, .3, .3),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])
val_trans = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])

## The Data

In [7]:
from ppt.utils import DogsCatsDataset

train_ds = DogsCatsDataset("../data/raw", "sample/train", transform=train_trans)
val_ds = DogsCatsDataset("../data/raw", "sample/valid", transform=val_trans)

n_classes = 2

Loading data from ../data/raw/dogscats/sample/train.
Loading data from ../data/raw/dogscats/sample/valid.


## Dataloader
Batch loading for datasets with multi-processing and different sample strategies.

In [8]:
from torch.utils.data import DataLoader

BATCH_SIZE = 2

train_dl = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
)
val_dl = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
)

## The Model

In [9]:
from torchvision import models

model = models.resnet18(pretrained=True)

In [10]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [11]:
# Freeze all parameters
for param in model.parameters():
    param.requires_grad = False

In [12]:
freeze_all(model.parameters())
assert all_frozen(model.parameters())

Replace the last layer with a linear layer. New layers have `requires_grad = True`.

In [13]:
model.fc = nn.Linear(512, n_classes)

In [14]:
all_frozen(model.parameters())

False

In [25]:
def get_model(n_classes=2):
    model = models.resnet18(pretrained=True)
    freeze_all(model.parameters())
    model.fc = nn.Linear(512, n_classes)
    return model

model = get_model()

The loss function

In [26]:
criterion = nn.CrossEntropyLoss()

The optimizer

In [27]:
optimizer = torch.optim.SGD(
    get_trainable(model.parameters()),  # model.fc.parameters()
    lr=0.0001,
    momentum=0.9,
)

The Train Loop

In [28]:
N_EPOCHS = 2

In [29]:
model = model.to(device)

In [31]:
for epoch in range(N_EPOCHS):
    print(f"Epoch {epoch}/{N_EPOCHS}")
    
    # Train
    model.train()  # IMPORTANT
    
    running_loss, correct = 0.0, 0
    
    for X, y in train_dl:
        X, y = X.to(device), y.to(device)
        
        optimizer.zero_grad()
        
        y_ = model(X)
        loss = criterion(y_, y)
        
        loss.backward()
        optimizer.step()
        print(f"    loss: {loss.item():0.3f}")
        
        _, y_label_ = torch.max(y_, 1)
        correct += (y_label_ == y).sum().item()
        running_loss += loss.item() * X.shape[0]
    
    print(f"  Train Loss: {running_loss / len(train_dl.dataset)}")
    print(f"  Train Acc:  {correct / len(train_dl.dataset)}")
    
    
    # Eval
    model.eval()  # IMPORTANT
    
    running_loss, correct = 0.0, 0
    
    with torch.no_grad():  # IMPORTANT
        for X, y in val_dl:
            X, y = X.to(device), y.to(device)
        
            optimizer.zero_grad()
            
            y_ = model(X)
            lossl = criterion(y_, y)
        
            _, y_label_ = torch.max(y_, 1)
            correct += (y_label_ == y).sum().item()
            running_loss += loss.item() * X.shape[0]
    
    print(f"  Valid Loss: {running_loss / len(val_dl.dataset)}")
    print(f"  Valid Acc:  {correct / len(val_dl.dataset)}")
    print()

Epoch 0/2
    loss: 0.588
    loss: 0.955
    loss: 0.660
    loss: 0.928
    loss: 0.489
    loss: 0.593
    loss: 0.700
    loss: 0.713
  Train Loss: 0.7033335417509079
  Train Acc:  0.625
  Valid Loss: 0.7131053805351257
  Valid Acc:  1.0

Epoch 1/2
    loss: 0.643
    loss: 0.771
    loss: 0.571
    loss: 0.414
    loss: 0.687
    loss: 0.606
    loss: 0.768
    loss: 0.355
  Train Loss: 0.6016690582036972
  Train Acc:  0.75
  Valid Loss: 0.3547409772872925
  Valid Acc:  1.0



## Intermission: training libraries

### Ignite
Ignite is a high-level library to help with training neural networks in PyTorch.

- ignite helps you write compact but full-featured training loops in a few lines of code
- you get a training loop with metrics, early-stopping, model checkpointing and other features without the boilerplate

https://github.com/pytorch/ignite

### TNT
TNT is a library providing powerful dataloading, logging and visualization utlities for Python. It is closely intergrated with PyTorch and is designed to enable rapid iteration with any model or training regimen.

https://github.com/pytorch/tnt


### Skorch
A scikit-learn compatible neural network library that wraps PyTorch.

https://github.com/dnouri/skorch

### The fun of Reinvention
Clearly, you can do better than they, invent your own! :D

## Demo with Ignite

In [92]:
import ignite
from ignite.metrics import (
    CategoricalAccuracy,
    Loss,
    Precision,
)
from ignite.engine import (
    create_supervised_evaluator,
    create_supervised_trainer,
    Events,
)

# model, loss, optimizer
model = get_model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(get_trainable(model.parameters()), lr=0.001, momentum=.9)

# trainer and evaluator
trainer = create_supervised_trainer(model, optimizer, criterion)
evaluator = create_supervised_evaluator(
    model,
    metrics={
        "accuracy": CategoricalAccuracy(),
        "loss": Loss(criterion),
        "precision": Precision(),
    }
)

In [93]:
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    print(f"Epoch[{engine.state.epoch}] Batch[{engine.state.iteration}] Loss: {engine.state.output:.2f}")


trainer.run(train_dl, max_epochs=1)

Epoch[1] Batch[1] Loss: 0.92
Epoch[1] Batch[2] Loss: 0.64
Epoch[1] Batch[3] Loss: 0.81
Epoch[1] Batch[4] Loss: 0.39
Epoch[1] Batch[5] Loss: 0.82
Epoch[1] Batch[6] Loss: 1.76
Epoch[1] Batch[7] Loss: 0.86
Epoch[1] Batch[8] Loss: 0.95


<ignite.engine.engine.State at 0x7fedf8f4df28>

In [84]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    print(f"Training Results   - Epoch: {trainer.state.epoch}  "
          f"accuracy: {metrics['accuracy']:.2f} "
          f"loss: {metrics['loss']:.2f} "
          f"prec: {metrics['precision']}")


@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    print(f"Validation Results - Epoch: {trainer.state.epoch}  "
          f"accuracy: {metrics['accuracy']:.2f} "
          f"loss: {metrics['loss']:.2f} "
          f"prec: {metrics['precision']}")


trainer.run(train_dl, max_epochs=1)

Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.12
Epoch[1] Loss: 0.13
Epoch[1] Loss: 0.13
Epoch[1] Loss: 2.26
Epoch[1] Loss: 2.26
Epoch[1] Loss: 1.81
Epoch[1] Loss: 1.81
Epoch[1] Loss: 0.19
Epoch[1] Loss: 0.19
Epoch[1] Loss: 0.38
Epoch[1] Loss: 0.38
Epoch[1] Loss: 2.44
Epoch[1] Loss: 2.44
Epoch[1] Loss: 0.62
Epoch[1] Loss: 0.62
Training Results   - Epoch: 1  accuracy: 0.94 loss: 0.11 prec: tensor([ 1.0000,  0.8889])
Validation Results - Epoch: 1  accuracy: 0.62 loss: 0.54 prec: tensor([ 1.0000,  0.5714])


<ignite.engine.engine.State at 0x7fee0cdca4e0>

# Visualization with Tensorboard and TensorboardX
- https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard
- https://github.com/lanpa/tensorboard-pytorch

Start tensorboard:
```
cd notebooks
tensorboard --logdir=tf_log
```

In [112]:
from tensorboardX import SummaryWriter

summary_writer = SummaryWriter(log_dir="tf_log/exp1")

In [113]:
ls tf_log

[0m[01;34mexp1[0m/


In [114]:
# Visualize the graph/network
X, _ = next(iter(train_dl))
summary_writer.add_graph(model, X)

Then visit http://localhost:6006

In [117]:
# write some scalars
for i in range(10):
    summary_writer.add_scalar("training/loss", np.random.rand(), i)
    summary_writer.add_scalar("validation/loss", np.random.rand() + .1, i)

Use tensorboard with ignite

In [120]:
# new SummaryWriter for new experiment
summary_writer = SummaryWriter(log_dir="tf_log/exp1_ignite")

# Basic setup: model, loss, optimizer
model = get_model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(get_trainable(model.parameters()), lr=0.001, momentum=.9)

# trainer and evaluator
trainer = create_supervised_trainer(model, optimizer, criterion)
evaluator = create_supervised_evaluator(
    model,
    metrics={
        "accuracy": CategoricalAccuracy(),
        "loss": Loss(criterion),
    }
)

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    epoch = trainer.state.epoch
    summary_writer.add_scalar("training/accuracy", metrics['accuracy'], epoch)
    summary_writer.add_scalar("training/loss", metrics['loss'], epoch)


@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    epoch = trainer.state.epoch
    summary_writer.add_scalar("validation/accuracy", metrics['accuracy'], epoch)
    summary_writer.add_scalar("validation/loss", metrics['loss'], epoch)
    print(metrics['accuracy'])

In [121]:
trainer.run(train_dl, 4)

<ignite.engine.engine.State at 0x7fee00372da0>

## Intermission: the output of a model

In [None]:
y_

In [None]:
torch.max(y_, 1)

# Write your own dataset

In [None]:
from torch.utils.data import Dataset


class MyData(Dataset):
    def __init__(self):
        super().__init__()
        
    def __len__(self):
        return 0
    
    def __getitem__(self, idx):
        return None

# Visualization

# Linear Regression with PyTorch
# Simple Neural Network
# Conv Net

# Detour: K-Means in PyTorch

# PyTorch Outlook
- [0.4 "just" released](https://pytorch.org/2018/04/22/0_4_0-migration-guide.html)
  - `Tensors` and `Variables` have merged
  - Support for 0-dimensional (scalar) Tensors
  - Deprecation of the `volatile` flag
  
- [The road to 1.0: production ready PyTorch](https://pytorch.org/2018/05/02/road-to-1.0.html)
  - `torch.jit`
  - optimize for mobile
  - quantized inference (such as 8-bit inference)
  - caffe2 merget into pytorch repo
  - ONNX - Open Neural Network Exchange

In [None]:
import torch.jit

In [None]:
from torch.jit import script

@script
def rnn_loop(x):
    hidden = None
    for x_t in x.split(1):
        x, hidden = model(x, hidden)
    return x