# Example training notebook file

In [3]:
# add work directory
import os
import sys
import torch

# you should add root directory
sys.path.append(os.path.dirname("../"))
print(torch.__version__)

1.1.0


In [2]:
!pip install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
!pip install https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp36-cp36m-linux_x86_64.whl

Collecting torchvision==0.3.0
[?25l  Downloading https://download.pytorch.org/whl/cu100/torchvision-0.3.0-cp36-cp36m-linux_x86_64.whl (2.6MB)
[K     |████████████████████████████████| 2.6MB 7.3MB/s 
Installing collected packages: torchvision
  Found existing installation: torchvision 0.4.1+cu100
    Uninstalling torchvision-0.4.1+cu100:
      Successfully uninstalled torchvision-0.4.1+cu100
Successfully installed torchvision-0.3.0


# Repository Init

In [5]:
# clone repo
!rm -rf repo
!git clone https://github.com/puilp0502/pytorch-hair-segmentation repo
!mv repo/* ..

Cloning into 'repo'...
remote: Enumerating objects: 13, done.[K
remote: Counting objects:   7% (1/13)[Kremote: Counting objects:  15% (2/13)[Kremote: Counting objects:  23% (3/13)[Kremote: Counting objects:  30% (4/13)[Kremote: Counting objects:  38% (5/13)[Kremote: Counting objects:  46% (6/13)[Kremote: Counting objects:  53% (7/13)[Kremote: Counting objects:  61% (8/13)[Kremote: Counting objects:  69% (9/13)[Kremote: Counting objects:  76% (10/13)[Kremote: Counting objects:  84% (11/13)[Kremote: Counting objects:  92% (12/13)[Kremote: Counting objects: 100% (13/13)[Kremote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 413 (delta 4), reused 5 (delta 2), pack-reused 400[K
Receiving objects: 100% (413/413), 45.84 MiB | 4.85 MiB/s, done.
Resolving deltas: 100% (219/219), done.
mv: cannot move 'repo/assets' to '../assets': Directory not empty
mv: cannot move 'repo/data' to '../data': Directory not em

In [6]:
# download data
!sh ../data/figaro.sh
!mv Figaro1k ../data/

navigating to ./data/ ...
../data/figaro.sh: 6: cd: can't cd to ./data/
Now downloading Figaro1k.zip ...
--2019-10-31 07:36:30--  https://www.dropbox.com/s/35momrh68zuhkei/Figaro1k.zip
Resolving www.dropbox.com (www.dropbox.com)... 162.125.65.1, 2620:100:6021:1::a27d:4101
Connecting to www.dropbox.com (www.dropbox.com)|162.125.65.1|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/35momrh68zuhkei/Figaro1k.zip [following]
--2019-10-31 07:36:30--  https://www.dropbox.com/s/raw/35momrh68zuhkei/Figaro1k.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc3533586e442b78ae853077f647.dl.dropboxusercontent.com/cd/0/inline/ArfXu3I4TnEh-ODb05pl5i-NAEQ2vy0gYQYNaoRHEUQ-hGClSl4jf47oEta8fsIfLQltaSs9zvLwK8IZ-nx_3jfBRB6XxJjhaUuRN4azJ0fKqA/file# [following]
--2019-10-31 07:36:30--  https://uc3533586e442b78ae853077f647.dl.dropboxusercontent.com/cd/0/inline/ArfXu3I4TnEh-ODb05pl5i-NAEQ

## Loading Figaro dataset using get_loader

In [0]:
# importing dataloader

from data import get_loader

# you have to predefine transforms to load dataset
# this transforms images and masks while loading
# example transforms

from utils import joint_transforms as jnt_trnsf
import torchvision.transforms as std_trnsf


# transforms that are applied to both images and masks
# includes geometrical changes like flip
# implemented in ./utils/joint_transforms.py
joint_transforms = jnt_trnsf.Compose([
    jnt_trnsf.Resize(256),
    jnt_trnsf.RandomRotate(5),
    jnt_trnsf.CenterCrop(224),
    jnt_trnsf.RandomHorizontallyFlip()
])


# transforms that are applied to only images
# this includes color jittering, normalizing, blurring, etc
# use torchvision.transforms, or implement additional transforms in 'utils'
train_image_transforms = std_trnsf.Compose([
    std_trnsf.ColorJitter(0.05, 0.05, 0.05, 0.05),
    std_trnsf.ToTensor(),
    std_trnsf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


test_image_transforms = std_trnsf.Compose([
    std_trnsf.ToTensor(),
    std_trnsf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

# transforms that are applied to only masks
mask_transforms = std_trnsf.Compose([
    std_trnsf.ToTensor()
    ])

# predifine other needed arguments
batch_size = 4
num_workers = 1
data_dir = '../data/Figaro1k/'

In [0]:
train_loader = get_loader(dataset='figaro',
                          data_dir=data_dir,
                          train=True,
                          joint_transforms=joint_transforms,
                          image_transforms=train_image_transforms,
                          mask_transforms=mask_transforms,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=num_workers)

test_loader = get_loader(dataset='figaro',
                         data_dir=data_dir,
                         train=False,
                         joint_transforms=joint_transforms,
                         image_transforms=test_image_transforms,
                         mask_transforms=mask_transforms,
                         batch_size=1,
                         shuffle=False,
                         num_workers=num_workers)

In [9]:
# two ways of iterating dataloader

# 1. using for loop

for step, (data, target) in enumerate(train_loader):
    break
step, data.size(), target.size() 


(0, torch.Size([4, 3, 224, 224]), torch.Size([4, 1, 224, 224]))

In [10]:
# 2. using iterator
batch_iterator = iter(train_loader)

for _ in range(10):
    data, target = batch_iterator.next()
data.size(), target.size()

(torch.Size([4, 3, 224, 224]), torch.Size([4, 1, 224, 224]))

## Importing model

In [0]:
# replace model location
!sed -i 's/http:\/\/data\.lip6\.fr\/cadene\/pretrainedmodels\/xception-b5690688\.pth/http:\/\/hakk.kr\/xception-b5690688.pth/' ../networks/deeplab_v3_plus.py 

In [24]:
import importlib
import networks.deeplab_v3_plus
importlib.reload(networks.deeplab_v3_plus)

<module 'networks.deeplab_v3_plus' from '../networks/deeplab_v3_plus.py'>

In [0]:
from networks import get_network

# you can add your own model in get_network fuction in ./networks/__init__.py 
# model = get_network(name='SegNet', num_class = 1)

# or just import directly
from networks.deeplab_v3_plus import DeepLab
model = DeepLab()

## Defining Optimizer & Scheduler & loss & device

In [0]:
# torch.optim
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), 
                             lr = lr, 
                             betas=(0.5, 0.999), # beta1 acts like 'momentum' in SGD
                            )

# torch.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

# torch.nn
loss = torch.nn.BCEWithLogitsLoss()

# flag to use gpu or not
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Using Pytorch Ignite

In [13]:
!pip install pytorch-ignite




In [33]:
!curl https://raw.githubusercontent.com/puilp0502/pytorch-hair-segmentation/master/utils/metrics.py > ../utils/metrics.py

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  2828  100  2828    0     0  14282      0 --:--:-- --:--:-- --:--:-- 14282


In [34]:
import importlib
import utils.metrics
importlib.reload(utils.metrics)

<module 'utils.metrics' from '../utils/metrics.py'>

In [0]:
# ignite moduels
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss

# custom modules
from utils.metrics import MultiThresholdMeasures, Accuracy, IoU, F1score

In [0]:
# trainer and evaluator
trainer = create_supervised_trainer(model, optimizer, loss, device=device)
measure = MultiThresholdMeasures()
evaluator = create_supervised_evaluator(model,
                                        metrics={
                                            '': measure,
                                            'pix-acc': Accuracy(measure),
                                            'iou': IoU(measure),
                                            'loss': Loss(loss),
                                            'f1': F1score(measure),
                                            },
                                        device=device)

In [0]:
# saving training state if you want
from utils import update_state, save_ckpt_file
state = update_state(model.state_dict(), 0, 0, 0, 0, 0)

In [42]:
ckpt_root = './ckpt/'
filename = '{network}_{optimizer}_lr_{lr}_epoch_{epoch}.pth'
ckpt_path = os.path.join(ckpt_root, filename)

@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(trainer):
    num_iter = (trainer.state.iteration - 1) % len(train_loader) + 1
    if num_iter % 20 == 0:
        print("Epoch[{}] Iter[{:03d}] Loss: {:.2f}".format(
            trainer.state.epoch, num_iter, trainer.state.output))

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    # evaluate on training set
    evaluator.run(train_loader)
    metrics = evaluator.state.metrics
    print("Training Results - Epoch: {} Avg-loss: {}\n Pix-acc: {}\n IoU: {}\n F1: {}\n".format(
        trainer.state.epoch, metrics['loss'], str(metrics['pix-acc']), str(metrics['iou']), str(metrics['f1'])))

    # update state
    update_state(weight=model.state_dict(),
                  train_loss=metrics['loss'],
                  val_loss=state['val_loss'],
                  val_pix_acc=state['val_pix_acc'],
                  val_iou=state['val_iou'],
                  val_f1=state['val_f1'])

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    # evaluate test(validation) set
    evaluator.run(test_loader)
    metrics = evaluator.state.metrics
    print("Validation Results - Epoch: {} Avg-loss: {}\n Pix-acc: {}\n IoU: {}\n F1: {}\n".format(
        trainer.state.epoch, metrics['loss'], str(metrics['pix-acc']), str(metrics['iou']), str(metrics['f1'])))

    # update scheduler
    scheduler.step(metrics['loss'])

    # update and save state
    update_state(weight=model.state_dict(),
                  train_loss=state['train_loss'],
                  val_loss=metrics['loss'],
                  val_pix_acc=metrics['pix-acc'],
                  val_iou=metrics['iou'],
                  val_f1=metrics['f1'])
    path = ckpt_path.format(network='deeplab',
                            optimizer=optimizer,
                            lr=lr,
                            epoch=trainer.state.epoch)
    save_ckpt_file(path, state)

trainer.run(train_loader, max_epochs=100)

Epoch[1] Iter[020] Loss: 0.12
Epoch[1] Iter[020] Loss: 0.12
Epoch[1] Iter[040] Loss: 0.11
Epoch[1] Iter[040] Loss: 0.11
Epoch[1] Iter[060] Loss: 0.12
Epoch[1] Iter[060] Loss: 0.12
Epoch[1] Iter[080] Loss: 0.15
Epoch[1] Iter[080] Loss: 0.15
Epoch[1] Iter[100] Loss: 0.14
Epoch[1] Iter[100] Loss: 0.14
Epoch[1] Iter[120] Loss: 0.12
Epoch[1] Iter[120] Loss: 0.12
Epoch[1] Iter[140] Loss: 0.13
Epoch[1] Iter[140] Loss: 0.13
Epoch[1] Iter[160] Loss: 0.26
Epoch[1] Iter[160] Loss: 0.26
Epoch[1] Iter[180] Loss: 0.33
Epoch[1] Iter[180] Loss: 0.33
Epoch[1] Iter[200] Loss: 0.10
Epoch[1] Iter[200] Loss: 0.10
Training Results - Epoch: 1 Avg-loss: 0.18539965878285114
 Pix-acc: [0.423, 0.903, 0.92, 0.925, 0.926, 0.926, 0.922, 0.916, 0.903, 0.875, 0.577]
 IoU: [0.423, 0.809, 0.833, 0.84, 0.84, 0.835, 0.826, 0.809, 0.777, 0.71, 0.0]
 F1: [0.594, 0.894, 0.909, 0.913, 0.913, 0.91, 0.905, 0.894, 0.875, 0.83, nan]

Validation Results - Epoch: 1 Avg-loss: 0.18581843047979332
 Pix-acc: [0.42, 0.898, 0.916, 0.923

KeyboardInterrupt: ignored

In [45]:
!tar cvzf ckpt.tar.gz ckpt/


ckpt/
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_8.pth
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_4.pth
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_9.pth
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_5.pth
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_3.pth
ckpt/deeplab_Adam (\nParameter Group 0\n    amsgrad: False\n    betas: (0.5, 0.999)\n    eps: 1e-08\n    lr: 0.001\n    weight_decay: 0\n)_lr_0.001_epoch_6.pth
ckpt/deeplab_Adam (\nParameter Gro

In [52]:
!cat ckpt.tar.gz | nc hakk.kr 8888




## To do this in one-queue

```bash
# run this in root

python3 main.py \
  --networks segnet \
  --scheduler ReduceLROnPlateau \
  --batch_size 4 \
  --epochs 100 \
  --lr 1e-3 \
  --num_workers 4 \
  --optimizer adam \
  --momentum 0.5
```