In [None]:
# default_exp trainer

In [None]:
# hide
# skip
!git clone https://github.com/marcomatteo/steel_segmentation.git

In [None]:
# hide
# skip
!pip install -e steel_segmentation

# Trainer

> Train classes for Deep Learning models with Fastai/Pytorch.

In [None]:
# hide
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export
from steel_segmentation.metadata import *
from steel_segmentation.masks import *
from steel_segmentation.datasets import *
from steel_segmentation.dataloaders import *
from steel_segmentation.metrics import *
from steel_segmentation.loss import *

from fastcore.foundation import *
from fastai.vision.all import *
import fastai

import os
import pdb
import time
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import segmentation_models_pytorch as smp

In [None]:
# hide
only_imgs = ["0a1cade03.jpg", "bca4ae758.jpg", "988cf521f.jpg", "b6a257b28.jpg",
             "b2ad335bf.jpg", "72aaba8ad.jpg", "f383950e8.jpg"]
train = train[train["ImageId"].isin(only_imgs)].copy()
train_all = train_all[train_all["ImageId"].isin(only_imgs)].copy()
train_multi = train_multi[train_multi["ImageId"].isin(only_imgs)].copy()

In [None]:
# exports
models_dir = path.parent / "models"

In the Paperspace Gradient machine I stored these models:

In [None]:
# missing
print_competition_data(models_dir)

../models/fastai-UNET-ResNet34-256-stage5.pth
../models/.ipynb_checkpoints
../models/kaggle-UNET-ResNet34.pth
../models/fastai-UNET-ResNet34-256-stage3.pth
../models/kaggle-FPN-ResNet34.pth


## Fast.ai Classification

In [None]:
# exports
class_metrics = [accuracy_multi, PrecisionMulti(), RecallMulti()]

An example to train fast.ai models with a classification `Learner`.

In [None]:
# missing
bs = 4

dls = get_classification_dls(bs)
arch = partial(resnet18, pretrained=True)
class_learner = cnn_learner(
    dls=dls, arch=arch, metrics=class_metrics, pretrained=True)

## Fast.ai Segmentation

First we create a classification model to get an encoder that know how to classify defects pixels.
Then, we build a UNet from the trained encoder and train a segmentation model.

In [None]:
# exports
seg_metrics = [DiceMulti(), KaggleDice()]

An example to train fast.ai models with a segmentation `Learner`.

In [None]:
# missing
bs = 4
szs = (128, 800)

#dls = get_segmentation_dls_from_df(train_multi, bs, szs)
dls = get_segmentation_dls(bs, szs)
segmentation_learner = unet_learner(
    dls=dls, arch=resnet18, metrics=seg_metrics, pretrained=True)

In [None]:
#missing
segmentation_learner.summary()

  return np.nanmean(binary_dice_scores)


DynamicUnet (Input shape: 4)
Layer (type)         Output Shape         Param #    Trainable 
                     4 x 64 x 64 x 400   
Conv2d                                    9408       False     
BatchNorm2d                               128        True      
ReLU                                                           
MaxPool2d                                                      
Conv2d                                    36864      False     
BatchNorm2d                               128        True      
ReLU                                                           
Conv2d                                    36864      False     
BatchNorm2d                               128        True      
Conv2d                                    36864      False     
BatchNorm2d                               128        True      
ReLU                                                           
Conv2d                                    36864      False     
BatchNorm2d                      

To load custom weights in the Unet Encoder:

In [None]:
# missing
encoder_path = models_dir / "ResNet18-2_class.pt"
segmentation_learner.model[0].load_state_dict(
    torch.load(encoder_path), strict=True)

## Pytorch Trainer

The code in this project is not only FastAi. I based an alternative solution based on this [kernel](https://www.kaggle.com/rishabhiitbhu/unet-starter-kernel-pytorch-lb-0-88). 
In this notebook I will go through each part of the model from that kernel.


In [None]:
# cuda
model = smp.Unet("resnet18", 
                 encoder_weights="imagenet", 
                 classes=4, 
                 activation=None)

In [None]:
# export
class Trainer:
    '''This class takes care of training and validation of our model'''
    
    def __init__(self, model, save_path,
                 num_epochs=20, lr=5e-4, 
                 bs=16, num_workers=6):
        self.num_workers = num_workers
        self.batch_size = {"train": bs, "val": bs//2}
        self.accumulation_steps = 32 // self.batch_size['train']
        self.lr = lr
        self.num_epochs = num_epochs
        self.net = model
        self.save_path = save_path
        
        self.best_loss = float("inf")
        self.phases = ["train", "val"]
        self.device = torch.device("cuda:0")
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
        self.loss_fn = torch.nn.BCEWithLogitsLoss()
        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode="min", patience=3, verbose=True)
        self.net = self.net.to(self.device)
        cudnn.benchmark = True
        
        self.dataloaders = {
            phase: get_train_dls(
                phase=phase,
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
                batch_size=self.batch_size[phase],
                num_workers=self.num_workers,
            )
            for phase in self.phases
        }
        
        self.losses = {phase: [] for phase in self.phases}
        self.iou_scores = {phase: [] for phase in self.phases}
        self.dice_scores = {phase: [] for phase in self.phases}
        
    def forward(self, images, targets):
        """
        Forward pass: 
            load to GPU the imgs and masks,
            calculate predictions,
            calculate loss
        
        Returns:
            loss and predictions
        """
        images = images.to(self.device)
        masks = targets.to(self.device)
        preds = self.net(images)
        loss = self.loss_fn(preds, masks)
        return loss, preds

    def iterate(self, epoch, phase):
        """
        Iterate throught each batch in training or validation phase.
        """
        meter = Meter(phase, epoch)
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch} | phase: {phase} | ⏰: {start}")
        
        batch_size = self.batch_size[phase]
        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]
        
        running_loss = 0.0
        total_batches = len(dataloader)
#         tk0 = tqdm(dataloader, total=total_batches)
        self.optimizer.zero_grad()
        for itr, batch in enumerate(dataloader): # replace `dataloader` with `tk0` for tqdm
            images, targets = batch
            loss, outputs = self.forward(images, targets)
            loss = loss / self.accumulation_steps
            if phase == "train":
                loss.backward()
                if (itr + 1 ) % self.accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()
            running_loss += loss.item()
            outputs = outputs.detach().cpu()
            meter.update(targets, outputs)
#             tk0.set_postfix(loss=(running_loss / ((itr + 1))))

        epoch_loss = (running_loss * self.accumulation_steps) / total_batches
        dice, iou = epoch_log(phase, epoch, epoch_loss, meter, start)
        
        self.losses[phase].append(epoch_loss)
        self.dice_scores[phase].append(dice)
        self.iou_scores[phase].append(iou)
        
        torch.cuda.empty_cache()
        return epoch_loss

    def start(self):
        """
        Training loop for each epochs.
        """
        for epoch in range(self.num_epochs):
            self.iterate(epoch, "train")
            state = {
                "epoch": epoch,
                "best_loss": self.best_loss,
                "state_dict": self.net.state_dict(),
                "optimizer": self.optimizer.state_dict(),
            }
            with torch.no_grad():
                val_loss = self.iterate(epoch, "val")
                self.scheduler.step(val_loss)
            if val_loss < self.best_loss:
                print("******** New optimal found, saving state ********")
                state["best_loss"] = self.best_loss = val_loss
                torch.save(state, self.save_path)
            print()

In [None]:
show_doc(Trainer.__init__)

<h4 id="Trainer.__init__" class="doc_header"><code>Trainer.__init__</code><a href="__main__.py#L5" class="source_link" style="float:right">[source]</a></h4>

> <code>Trainer.__init__</code>(**`model`**, **`save_path`**, **`num_epochs`**=*`20`*, **`lr`**=*`0.0005`*, **`bs`**=*`16`*, **`num_workers`**=*`6`*)

Initialize self.  See help(type(self)) for accurate signature.

In [None]:
show_doc(Trainer.forward)

<h4 id="Trainer.forward" class="doc_header"><code>Trainer.forward</code><a href="__main__.py#L42" class="source_link" style="float:right">[source]</a></h4>

> <code>Trainer.forward</code>(**`images`**, **`targets`**)

Forward pass: 
    load to GPU the imgs and masks,
    calculate predictions,
    calculate loss

Returns:
    loss and predictions

In [None]:
show_doc(Trainer.iterate)

<h4 id="Trainer.iterate" class="doc_header"><code>Trainer.iterate</code><a href="__main__.py#L58" class="source_link" style="float:right">[source]</a></h4>

> <code>Trainer.iterate</code>(**`epoch`**, **`phase`**)

Iterate throught each batch in training or validation phase.

In [None]:
show_doc(Trainer.start)

<h4 id="Trainer.start" class="doc_header"><code>Trainer.start</code><a href="__main__.py#L98" class="source_link" style="float:right">[source]</a></h4>

> <code>Trainer.start</code>()

Training loop for each epochs.

In [None]:
# cuda
model_trainer = Trainer(model, path)

In [None]:
# hide
from nbdev.export import notebook2script
notebook2script()

Converted 01_core.ipynb.
Converted 02_data.ipynb.
Converted 03_models.dataloaders.ipynb.
Converted 04_models.metrics.ipynb.
Converted 06_models.model.ipynb.
Converted 07_models.predict.ipynb.
Converted index.ipynb.
