# Transfer learning with PyTorch
We're going to train a neural network to classify dogs and cats.

## Init, helpers, utils, ...

In [1]:
from pprint import pprint
import random
import datetime
import time

from IPython.core.debugger import set_trace

import matplotlib.pyplot as plt
import numpy as np

#from ppt import utils
#from ppt.utils import attr

%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
import torchvision
from torchvision.datasets.folder import ImageFolder, default_loader

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [5]:
# Training helpers
def get_trainable(model_params):
    return (p for p in model_params if p.requires_grad)


def get_frozen(model_params):
    return (p for p in model_params if not p.requires_grad)


def all_trainable(model_params):
    return all(p.requires_grad for p in model_params)


def all_frozen(model_params):
    return all(not p.requires_grad for p in model_params)


def freeze_all(model_params):
    for param in model_params:
        param.requires_grad = False


# list(get_trainable(model.parameters()))
# list(get_frozen(model.parameters()))
# all_trainable(model.parameters())
# all_frozen(model.parameters())

# The Data - DogsCatsDataset

## Transforms

In [6]:
from torchvision import transforms

IMG_SIZE = 224  #224  #defined by NN model input
_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]


train_trans = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),  #256  #(IMG_SIZE, IMG_SIZE)  # some images are pretty small
    #transforms.RandomCrop(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(.3, .3, .3),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])
val_trans = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),  #256  #(IMG_SIZE, IMG_SIZE)
    #transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(_mean, _std),
])

## Dataset

In [7]:
# not necessary
#from ppt.utils import DogsCatsDataset

In [8]:
# sample data set
#train_ds = DogsCatsDataset("../data/raw", "sample/train", transform=train_trans)
#val_ds = DogsCatsDataset("../data/raw", "sample/valid", transform=val_trans)
#BATCH_SIZE = 2

# full data set
# use ppt.utils
#train_ds = DogsCatsDataset("../data/raw", "train", transform=train_trans)
#val_ds = DogsCatsDataset("../data/raw", "valid", transform=val_trans)
# use pytorch_version default
train_ds = ImageFolder("../data/raw/DUI/train", transform=train_trans, loader=default_loader)
val_ds = ImageFolder("../data/raw/DUI/valid", transform=train_trans, loader=default_loader)

BATCH_SIZE = 10  #2  #256  #512  #32  #220 for resnet152 on Dell Presison 5520 laptop, 400 for resnet18

n_classes = 2

In [9]:
len(train_ds), len(val_ds)

(13787, 1421)

## DataLoader
Batch loading for datasets with multi-processing and different sample strategies.

In [10]:
from torch.utils.data import DataLoader


train_dl = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=4,
)
val_dl = DataLoader(
    val_ds,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
)

# The Model
PyTorch offers quite a few [pre-trained networks](https://pytorch.org/docs/stable/torchvision/models.html) for you to use:
- AlexNet
- VGG
- ResNet
- SqueezeNet
- DenseNet
- Inception v3

And there are more available via [pretrained-models.pytorch](https://github.com/Cadene/pretrained-models.pytorch)
- NASNet,
- ResNeXt,
- InceptionV4,
- InceptionResnetV2, 
- Xception, 
- DPN,
- ...

In [11]:
from torchvision import models

#model = models.resnet18(pretrained=True)
#model = models.resnet50(pretrained=True)
#model = models.resnet101(pretrained=True)
model = models.resnet152(pretrained=True)

In [12]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [13]:
for param in model.parameters():
    param.requires_grad = True

#for param in model.parameters():
    #pprint(param)

In [14]:
'''# Freeze all parameters
for param in model.parameters():
    param.requires_grad = False'''

'# Freeze all parameters\nfor param in model.parameters():\n    param.requires_grad = False'

In [15]:
'''freeze_all(model.parameters())
assert all_frozen(model.parameters())'''

'freeze_all(model.parameters())\nassert all_frozen(model.parameters())'

Replace the last layer with a linear layer. New layers have `requires_grad = True`.

In [16]:
'''model.fc = nn.Linear(512, n_classes)  # according to the model, 512 for resnet18, 2048 for resnet50 & resnet101 & resnet152'''

'model.fc = nn.Linear(512, n_classes)  # according to the model, 512 for resnet18, 2048 for resnet50 & resnet101 & resnet152'

In [17]:
'''all_frozen(model.parameters())'''

'all_frozen(model.parameters())'

In [18]:
'''# repetitive
def get_model(n_classes=2):
    model = models.resnet18(pretrained=True)
    freeze_all(model.parameters())
    model.fc = nn.Linear(512, n_classes)
    return model'''

#model = get_model().to(device)

model = model.to(device)

# The Loss

In [19]:
criterion = nn.CrossEntropyLoss()

# The Optimizer

In [20]:
optimizer = torch.optim.Adam(
    get_trainable(model.parameters()),
    # model.fc.parameters(),
    lr=0.001,
    # momentum=0.9,
)

# The Train Loop

In [21]:
N_EPOCHS = 20  #1  #2  #10

In [22]:
for epoch in range(N_EPOCHS):
    
    # start epoch
    start_time = time.time()
    start_datetime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"Epoch {epoch+1}/{N_EPOCHS}")
    print(f'  Start Time: {start_datetime}')
    
    # Train
    model.train()  # IMPORTANT
    
    running_loss, correct = 0.0, 0
    for X, y in train_dl:
        X, y = X.to(device), y.to(device)
        
        optimizer.zero_grad()
        # with torch.set_grad_enabled(True):
        y_ = model(X)
        loss = criterion(y_, y)

        loss.backward()
        optimizer.step()
        
        # Statistics
        print(f"    batch loss: {loss.item():0.3f}")
        _, y_label_ = torch.max(y_, 1)
        correct += (y_label_ == y).sum().item()
        running_loss += loss.item() * X.shape[0]
    
    print(f"  Train Loss: {running_loss / len(train_dl.dataset)}")
    print(f"  Train Acc:  {correct / len(train_dl.dataset)}")
    
    
    # Eval
    model.eval()  # IMPORTANT
    
    running_loss, correct = 0.0, 0
    with torch.no_grad():  # IMPORTANT
        for X, y in val_dl:
            X, y = X.to(device), y.to(device)
                    
            y_ = model(X)
        
            _, y_label_ = torch.max(y_, 1)
            correct += (y_label_ == y).sum().item()
            
            loss = criterion(y_, y)
            running_loss += loss.item() * X.shape[0]
    
    print(f"  Valid Loss: {running_loss / len(val_dl.dataset)}")
    print(f"  Valid Acc:  {correct / len(val_dl.dataset)}")
    
    # end epoch
    end_time = time.time()
    end_datetime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    time_elapsed = end_time - start_time
    datetime_elapsed = str(datetime.timedelta(seconds = time_elapsed))
    print(f'  End Time: {end_datetime}')
    print(f'  Time Elapsed: {datetime_elapsed}')
    print()

Epoch 1/20
  Start Time: 2019-01-22 01:48:24
    batch loss: 8.953
    batch loss: 5.169
    batch loss: 5.055
    batch loss: 3.219
    batch loss: 3.600
    batch loss: 0.844
    batch loss: 1.506
    batch loss: 0.924
    batch loss: 0.802
    batch loss: 0.091
    batch loss: 1.203
    batch loss: 1.823
    batch loss: 0.650
    batch loss: 0.983
    batch loss: 0.308
    batch loss: 0.552
    batch loss: 0.726
    batch loss: 0.725
    batch loss: 0.331
    batch loss: 0.432
    batch loss: 0.829
    batch loss: 0.569
    batch loss: 0.715
    batch loss: 0.607
    batch loss: 0.202
    batch loss: 0.589
    batch loss: 0.164
    batch loss: 0.415
    batch loss: 0.425
    batch loss: 0.177
    batch loss: 0.352
    batch loss: 0.689
    batch loss: 0.298
    batch loss: 1.195
    batch loss: 0.653
    batch loss: 0.237
    batch loss: 0.466
    batch loss: 0.537
    batch loss: 0.349
    batch loss: 0.801
    batch loss: 0.383
    batch loss: 0.197
    batch loss: 0.709
    batch

KeyboardInterrupt: 

** -- above is the result of crop extent data set -- **

# Intermission: training libraries

Writing the training loop is my least favourite thing about PyTorch.

Keras is great here!
```python
model.compile(optimizer, criterion, metrics=["accuracy", "f1"])
model.fit(X, y, epochs=10)
```


### [Ignite](https://github.com/pytorch/ignite)
> Ignite is a high-level library to help with training neural networks in PyTorch.
> - ignite helps you write compact but full-featured training loops in a few lines of code
> - you get a training loop with metrics, early-stopping, model checkpointing and other features without the boilerplate


### [TNT](https://github.com/pytorch/tnt)
> TNT is a library providing powerful dataloading, logging and visualization utlities for Python. It is closely intergrated with PyTorch and is designed to enable rapid iteration with any model or training regimen.
> [...]
> The project was inspired by TorchNet, and legend says that it stood for “TorchNetTwo”


### [Skorch](https://github.com/dnouri/skorch)
> A scikit-learn compatible neural network library that wraps PyTorch.


### "The fun of Reinvention"
Clearly, there must be a better way! Write your own lib (but don't use it) :D

## Demo with Ignite

In [None]:
import ignite
from ignite.metrics import (
    CategoricalAccuracy,
    Loss,
    Precision,
)
from ignite.engine import (
    create_supervised_evaluator,
    create_supervised_trainer,
    Events,
)

In [None]:
# model, loss, optimizer
model = get_model().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    get_trainable(model.parameters()),
    lr=0.001,
    momentum=.9,
)

# trainer and evaluator
trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
evaluator = create_supervised_evaluator(
    model,
    metrics={
        "accuracy": CategoricalAccuracy(),
        "loss": Loss(criterion),
        "precision": Precision(),
    },
    device=device,
)

In [None]:
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    print(f"Epoch[{engine.state.epoch}] Batch[{engine.state.iteration}] Loss: {engine.state.output:.2f}")


# trainer.run(train_dl, max_epochs=1)

In [None]:
@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    print(f"Training Results   - Epoch: {trainer.state.epoch}  "
          f"accuracy: {metrics['accuracy']:.2f} "
          f"loss: {metrics['loss']:.2f} "
          f"prec: {metrics['precision'].cpu()}")


@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    print(f"Validation Results - Epoch: {trainer.state.epoch}  "
          f"accuracy: {metrics['accuracy']:.2f} "
          f"loss: {metrics['loss']:.2f} "
          f"prec: {metrics['precision'].cpu()}")


trainer.run(train_dl, max_epochs=1)

# Visualization with Tensorboard
- https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard
- https://github.com/lanpa/tensorboard-pytorch

Demo: https://github.com/lanpa/tensorboard-pytorch/blob/master/screenshots/Demo.gif


Start tensorboard:
```
cd notebooks
tensorboard --logdir=tf_log
```

In [None]:
rm -r tf_log/*

In [None]:
ls tf_log/

In [None]:
from tensorboardX import SummaryWriter

summary_writer = SummaryWriter(log_dir=f"tf_log/exp_{random.randint(0, 100)}")

In [None]:
ls tf_log

In [None]:
# write some scalars
for i in range(10):
    summary_writer.add_scalar("training/loss", np.random.rand(), i)
    summary_writer.add_scalar("validation/loss", np.random.rand() + .1, i)

Then visit http://localhost:6006

In [None]:
# Visualize the graph/network
X, _ = next(iter(train_dl))
summary_writer.add_graph(model, X)

## Use tensorboard with ignite

In [None]:
# new SummaryWriter for new experiment
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
summary_writer = SummaryWriter(log_dir=f"tf_log/exp_ignite_{now}")

# Basic setup: model, loss, optimizer
model = get_model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(get_trainable(model.parameters()), lr=0.0001, momentum=.9)

# trainer and evaluator
trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
evaluator = create_supervised_evaluator(
    model,
    metrics={"accuracy": CategoricalAccuracy(), "loss": Loss(criterion)},
    device=device,
)

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_dl)
    metrics = evaluator.state.metrics
    epoch = trainer.state.epoch
    summary_writer.add_scalar("training/accuracy", metrics['accuracy'], epoch)
    summary_writer.add_scalar("training/loss", metrics['loss'], epoch)


@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(val_dl)
    metrics = evaluator.state.metrics
    epoch = trainer.state.epoch
    summary_writer.add_scalar("validation/accuracy", metrics['accuracy'], epoch)
    summary_writer.add_scalar("validation/loss", metrics['loss'], epoch)
    print(metrics['accuracy'])


trainer.run(train_dl, 15)

## Visdom
https://github.com/facebookresearch/visdom
![](https://camo.githubusercontent.com/d69475a01f9f327fc42931a21df8134d1fbdfc19/68747470733a2f2f6c68332e676f6f676c6575736572636f6e74656e742e636f6d2f2d62714839555843772d42452f574c3255736472726241492f41414141414141416e59632f656d727877436d6e7257345f434c54797955747442305359524a2d693443436951434c63422f73302f53637265656e2b53686f742b323031372d30332d30362b61742b31302e35312e30322b414d2e706e67253232766973646f6d5f626967253232)