### Image Segmentation with Unet - [notebook](https://github.com/fastai/fastai/blob/master/courses/dl2/carvana.ipynb), [video](https://youtu.be/nG3tT31nPmQ), [notes](https://medium.com/@hiromi_suenaga/deep-learning-2-part-2-lesson-14-e0d23c7a0add)

**Todos**
* Image augmentation as per Fastai - rotate images
* Use lr find
* Move ISDataBundle into data_lib
* DONE Freeze Resnet first, then unfreeze
* DONE Build Unet
* DONE Show results after predict
* DONE Standard template for every application - App, Arch, DTR, display batch/results, LR Find, Metrics, Progress, Tensor board results, Debugging layers, One Cycle scheduler, Databundle, Plot hyperparameters, Parameter groups
* DONE Try with bs = 64
* DONE Save the first model and load it when trying 512
* DONE Then save the 512 model and load it when trying 1024
* DONE When freezing, unfreeze the Batch Norm layers in the backbone. Only in Resnet, not the head.
* DONE Use one cycle, recorder
* DONE Use discriminative learning rates
* DONE add an arch.summary()







### Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import IPython.core.debugger as db
from pathlib import Path
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image
from functools import partial
from concurrent.futures import ThreadPoolExecutor
import shutil

import torch
import torch.nn.functional as F
from torch import tensor, nn
from torch.utils.data import DataLoader, Sampler, SequentialSampler, RandomSampler

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
gd_path = 'gdrive/My Drive/Colab Data/fastai-v3'  #change dir to your project folder
gn_path = 'gdrive/My Drive/Colab Notebooks'  #change dir to your project folder

import sys
sys.path.insert(1, gn_path + '/exp')

In [None]:
from nb_util import accuracy, dice, accuracy_thresh
from nb_data import DataBundle, CSVItemContainer, DfItemList, ImageFileItemList, ImageItemList, ILPairedDataset, kaggle_data
from nb_training import Trainer, CudaCB, ProgressCallback, MetricsCB, DebugTracker, DebugYhatLossCB
from nb_optimiser import Recorder, HyperParams, adam_opt_func
from nb_arch import FuncLayer, ArchBase
from nb_image import ShowImg

### Define Data File Paths

In [None]:
root_path = Path.cwd()
data_path = root_path/'carvana'
data_path.mkdir(exist_ok=True)

full_metadata_file_path = data_path/'metadata.csv'
full_masks_file_path = data_path/'train_masks.csv'
orig_masks_path = data_path/'train_masks'
orig_imgs_path = data_path/'train'

full_masks_path = data_path/'train_masks_128'
full_imgs_path = data_path/'train_128'
full_masks_path.mkdir(exist_ok=True)
full_imgs_path.mkdir(exist_ok=True)

temp_subset_masks_path = data_path/'train_masks_subset'
temp_subset_imgs_path = data_path/'train_subset'

g_data_path = Path(gd_path) / 'data'
subset_masks_path = g_data_path/'train_masks_subset'
subset_imgs_path = g_data_path/'train_subset'
subset_masks_path.mkdir(exist_ok=True)
subset_imgs_path.mkdir(exist_ok=True)

basic_model_128_path = data_path/'basic_model_128.pth'
basic_model_512_path = data_path/'basic_model_512.pth'
basic_model_1024_path = data_path/'basic_model_1024.pth'
unet_model_128_path = data_path/'unet_model_128.pth'
unet_model_512_path = data_path/'unet_model_512.pth'
unet_model_1024_path = data_path/'unet_model_1024.pth'

list(data_path.iterdir())

### Fetch Dataset from Kaggle

In [None]:
kaggle_data('competitions', 'carvana-image-masking-challenge')

In [None]:
def carvana_extract(zip_file, out_path):
  # It is a gigantic 24GB zip file, so delete some of the largest unnecessary files
  # to reduce it to 8.7GB and save some disk space
  !zip -d {zip_file} test_hq.zip
  !zip -d {zip_file} train_hq.zip
  !zipinfo {zip_file}

  !unzip -j {zip_file} metadata.csv.zip train_masks.csv.zip train.zip train_masks.zip -d {out_path}
  !unzip "carvana/*.zip" -d {out_path} >> /dev/null
  !rm {out_path}/*.zip
  !ls -l {out_path}

carvana_extract('carvana-image-masking-challenge.zip', data_path)

### Explore data

In [None]:
meta_df = pd.read_csv(full_metadata_file_path)
meta_df.head()

In [None]:
masks_df = pd.read_csv(full_masks_file_path)
masks_df.head()

In [None]:
ex_mask_files = list(orig_masks_path.iterdir())[:10]
ex_mask_files

In [None]:
Image.open(ex_mask_files[0]).resize((300,200))

In [None]:
ex_masks = [Image.open(mask) for mask in ex_mask_files]
ShowImg.show_grid(ex_masks, [mask_file.name for mask_file in ex_mask_files])

In [None]:
ex_img_files = list(orig_imgs_path.iterdir())[:10]
ex_img_files

In [None]:
Image.open(ex_img_files[0]).resize((300,200))

In [None]:
ex_imgs = [Image.open(img) for img in ex_img_files]
ShowImg.show_grid(ex_imgs, [img_file.name for img_file in ex_img_files])

In [None]:
CAR_ID = '154ee2b6d27a'
num_angles = 16
car_imgs = [Image.open(orig_imgs_path/f'{CAR_ID}_{i+1:02d}.jpg') for i in range(num_angles)]
car_masks = [Image.open(orig_masks_path/f'{CAR_ID}_{i+1:02d}_mask.gif') for i in range(num_angles)]

ShowImg.show_grid(car_imgs, car_masks, y_method=ShowImg.show_mask)

In [None]:
((len(masks_df)//16)//5)*16

### Build Unet Architecture

In [None]:
import torchvision.models as models

#----------------------------------------------------
# Block used by Trivial model
#----------------------------------------------------
class BasicUpsample(nn.Module):
  def __init__(self, ch_in, ch_out):
    super().__init__()
    self.conv = nn.ConvTranspose2d(ch_in, ch_out, kernel_size=2, stride=2)
    self.bn = nn.BatchNorm2d(ch_out)
    self.relu = nn.ReLU()
        
  def forward(self, x):
    return self.bn(self.relu(self.conv(x)))

#----------------------------------------------------
# Create the Unet architecture
#----------------------------------------------------
class ArchImageSegmentationBasic(ArchBase):
  # ----------------------------
  # Show summary of the model with output sizes of each layer, for a given
  # input size
  # ----------------------------
  def summary(self, sz=128):
    super().summary(input_sz=(3, sz, sz))

  # ----------------------------
  # Freeze the resnet layers of the model
  # ----------------------------
  def freeze(self, on=False):
    freeze_module = self.encoder
    super().freeze(module=freeze_module, on=on)

  # ----------------------------
  # Define the module-layer groups to split the model for discriminative LRs. 
  # The parameters from each module group will be put into separate parameter 
  # groups.
  # ----------------------------
  def module_groups(self):
    lr_groups = [self.encoder[:6], self.encoder[6:], self.head]
    return lr_groups

  # ----------------------------
  # Create a trivial model to upsample the encoded image
  # ----------------------------
  def create_model(self):
    resnet = models.resnet34(pretrained=True)
    # Take all Resnet layers except the last two (Adaptive Pool and FC layers)
    self.encoder = nn.Sequential(*list(resnet.children())[:-2])
    self.head = nn.Sequential(
        nn.ReLU(),
        BasicUpsample(512,256),
        BasicUpsample(256,256),
        BasicUpsample(256,256),
        BasicUpsample(256,256),
        nn.ConvTranspose2d(256, 1, 2, stride=2),
        # Flatten the channel dimension to go from shape (bs, 1, width, height) to 
        # (bs, width, height)
        FuncLayer(lambda x: x[:,0])
    )        
    self.model = nn.Sequential(self.encoder, self.head)

In [None]:

#----------------------------------------------------
# Decoder Block for Unet model. It consists of two input blocks and one 
# output block:
#   1. Cross input from the matching encoder level. This input is processed by a 
#       cross conv block
#   2. Upward input from the decoder level below it. This input is processed by an 
#       upsample conv transpose block
#   3. Output block concatenates the processed inputs from the two blocks above
#       and processed it through a batch norm and relu block.
#----------------------------------------------------
class UnetUpConv(nn.Module):
  def __init__(self, cross_ch_in, up_ch_in, up_ch_out):
    super().__init__()

    # Conv layer for the cross input
    cross_ch_out = up_ch_out // 2
    self.cross_conv = nn.Conv2d(cross_ch_in, cross_ch_out, 1)

    # Conv Transpose layer for the upsample input
    upsample_out = up_ch_out // 2
    self.upsample_conv = nn.ConvTranspose2d(up_ch_in, upsample_out, 2, stride=2)
    
    # Batch norm for the output
    self.bn = nn.BatchNorm2d(up_ch_out)

  def forward(self, cross_in, up_in):
    # Process the two inputs, cross and upsample, via their respective blocks
    cross_out = self.cross_conv (cross_in)
    upsample_out = self.upsample_conv (up_in)

    # Concat the processed inputs and pass them through the output block
    concat_cross_upsample = torch.cat([cross_out, upsample_out], dim=1)
    up_out = self.bn(F.relu(concat_cross_upsample))
    return up_out

#----------------------------------------------------
# Unet model consisting of a downward Encoder layer stack and an upward Decoder 
# layer stack with cross connections between them.
#
# The Encoder consists of four levels (top level_0 to level_3) plus a fifth bottom
# level_4, that reduce the image size going downward. 
#
# The Decoder consists of four levels (lowest level_3 to top level_0) that
# increase the image size going upward.
#
# There are cross connections between the four matching Encoder and Decoder levels.
# So Encoder level_0 has a cross connection to Decoder level_0 and so on.
# 
# Therefore, each Encoder level sends the same output to two places - one going down 
# to the Encoder level below it and another going cross-wise to the matching Decoder
# level.
#
# Similarly, each Decoder level gets two inputs - one coming up from the Decoder
# level below it and one coming cross-wise from the matching Encoder level.
# 
# The bottom Encoder level_4 is different because it has no matching Decoder 
# level. So it sends its output to only one place - it becomes the upcoming input
# for the lowest Decoder level_3
#----------------------------------------------------
class UnetResnet(nn.Module):
  def __init__(self, resnet):
    super().__init__()

    # Resnet Layers to be used for the Encoder levels, which have cross 
    # connections to the Decoder
    resnet_layers = list(resnet.children())
    self.enc0 = nn.Sequential(*resnet_layers[slice(0, 3)])
    self.enc1 = nn.Sequential(*resnet_layers[slice(3, 5)])
    self.enc2 = nn.Sequential(*resnet_layers[slice(5, 6)])
    self.enc3 = nn.Sequential(*resnet_layers[slice(6, 7)])
    self.enc4 = nn.Sequential(*resnet_layers[slice(7, 8)])

    # Channels output from each of the encoder's cross levels become
    # input channels for the corresponding decoder level
    n0 = 64
    cross_chs = [n0, n0, n0 * 2, n0 * 4]

    # Channels output upward from each of the decoder's levels
    # Bottom encoder level_4 output becomes the upward input for the bottom
    # decoder level_3
    bottom_ch_4 = n0 * 8
    up_chs = [256, 256, 256, 256, bottom_ch_4]
    
    # Build Decoder levels
    # Bottom decoder level_3 gets a cross input from the encoder level_3 and
    # an upward input from the bottom encoder level_4
    self.dec3 = UnetUpConv(cross_chs[3], up_chs[4], up_chs[3])
    self.dec2 = UnetUpConv(cross_chs[2], up_chs[3], up_chs[2])
    self.dec1 = UnetUpConv(cross_chs[1], up_chs[3], up_chs[1])
    self.dec0 = UnetUpConv(cross_chs[0], up_chs[1], up_chs[0])

    # Output layer after the Decoder
    self.out = nn.ConvTranspose2d(up_chs[0], 1, 2, stride=2)

  def forward(self, inp):
    # Process the input sequentially downward through all the encoder levels, saving away
    # the intermediate outputs of each level

    e0 = self.enc0 (inp)
    e1 = self.enc1 (e0)
    e2 = self.enc2 (e1)
    e3 = self.enc3 (e2)

    # Bottom encoder layer
    e4 = self.enc4 (e3)
    e4 = F.relu(e4)

    # Process the encoder's outputs sequentially upward through all the decoder levels
    # The bottom encoder layer's output 'e4' becomes the upward input to the bottom decoder
    # level. The previously saved encoder intermediate outputs become the cross inputs
    # to the matching decoder level. 
    d3 = self.dec3(e3, e4)
    d2 = self.dec2(e2, d3)
    d1 = self.dec1(e1, d2)
    d0 = self.dec0(e0, d1)

    # The decoder's output is passed through the final output layer to generate
    # the Unet model's output. We also flatten the output from [1, 128, 128] to
    # [128, 128]
    out = self.out(d0)
    out = out[:, 0]
    return out

#----------------------------------------------------
# Create the Unet architecture
#----------------------------------------------------
class ArchImageSegmentationUnet(ArchBase):
  # ----------------------------
  # Create a Unet model. It is slightly different because it uses a Resnet
  # encoder backbone instead of building it from scratch
  # ----------------------------
  def create_model(self):
    # Encoder is all Resnet layers except the last two (Adaptive Pool 
    # and FC layers)
    resnet = models.resnet34(pretrained=True)

    # Build the Unet
    self.model = UnetResnet(resnet)

  # ----------------------------
  # Show summary of the model with output sizes of each layer, for a given
  # input size
  # ----------------------------
  def summary(self, sz=128):
    super().summary(input_sz=(3, sz, sz))

  # ----------------------------
  # Freeze the resnet layers of the model
  # ----------------------------
  def freeze(self, on=False):
    freeze_module = nn.ModuleList([model.enc0, model.enc1, model.enc2, model.enc3, model.enc4])
    super().freeze(module=freeze_module, on=on)

  # ----------------------------
  # Define the module-layer groups to split the model for discriminative LRs. 
  # The parameters from each module group will be put into separate parameter 
  # groups.
  # ----------------------------
  def module_groups(self):
    model = self.model
    lr_groups = [nn.ModuleList([model.enc0, model.enc1, model.enc2]), 
                 nn.ModuleList([model.enc3, model.enc4]),
                 nn.ModuleList([model.dec3, model.dec2, model.dec1, model.dec0, model.out])]
    return lr_groups


### Define Image Segmentation Data Bundle

In [None]:
def mask_name_fn(row):
  col = 'img'
  mn = f'{row[col][:-4]}_mask.gif'
  return mn

#----------------------------------------------------
# Image Segmentation preparation pipeline
#----------------------------------------------------
class ImageSegmentationDataBundle(DataBundle):
  def __init__(self, csv_path, img_folder_path, mask_folder_path, resize_sz=128, bs=6):
    print ('--------- Image Segmentation DataBundle init', csv_path, img_folder_path, mask_folder_path)

    load_params = {'source': CSVItemContainer, 'target_cls': DfItemList, 'csv_path': csv_path}
    split_params = {'split_procedure': 'split_sequential', 'train_ratio': 0.8, 'valid_ratio': 0.2}
    extract_x_params = {'extract_procedure': 'extract_col', 'target_cls': ImageFileItemList, 'col': 'img', 'folder_path': img_folder_path}
    extract_y_params = {'extract_procedure': 'extract_custom', 'target_cls': ImageFileItemList, 'folder_path': mask_folder_path, 'custom_fn': mask_name_fn}
    convert_x_params = [
        {'target_cls': ImageItemList, 'convert_procedure': 'FileToImage', 'pair_type': 'mask'}, 
    ]
    convert_y_params = [
        {'target_cls': ImageItemList, 'convert_procedure': 'FileToImage'}, 
    ]
    xform_x_params = [
        {'xform_procedure': 'make_rgb'}, 
        {'xform_procedure': 'resize', 'size': resize_sz},
        {'xform_procedure': 'flip_rotate'},
        #{'xform_procedure': 'perspective_warp', 'crop_size': 100},
        #{'xform_procedure': 'aug', 'aug_name': 'Horizontal Flip'},
        #{'xform_procedure': 'aug', 'aug_name': 'Rotate'},
        {'xform_procedure': 'aug', 'aug_name': 'Random Brightness Contrast'},
        {'xform_procedure': 'to_byte_tensor'},
        {'xform_procedure': 'to_float_tensor'},
        {'xform_procedure': 'normalise'},
    ]
    ds_params = {'target_ds': ILPairedDataset}
    dl_params = (
        {'bs': bs, 'sampler_fn': RandomSampler},     # for training
        {'bs': bs, 'sampler_fn': SequentialSampler}  # for valid/test
    )
    self.post_proc_params = [
        # Normalise with mean/std as specified by pre-trained Resnet
        {'proc_procedure': 'set_mean_std', 'mean': [0.485, 0.456, 0.406], 'std':[0.229, 0.224, 0.225]}, 
    ]
    self.display_params = {
        'layout_procedure': 'display_images', 'figsize': (20, 5),
        'xyz_procedures': ('image', 'mask', 'label')
    }
    super().__init__(load_params, split_params, extract_x_params, extract_y_params, convert_x_params, convert_y_params, xform_x_params=xform_x_params, ds_params=ds_params, dl_params=dl_params)

### Define Image Segmentation application class 

In [None]:
#----------------------------------------------------
# Image Segmentation Application
#----------------------------------------------------
class AppImageSegmentation():

  def __init__(self):
    self._arch = None
    self.db = None

  # ----------------------------
  # Prepare a subset of the images for rapid testing during development
  # ----------------------------
  def _subset_data(self, masks_file_path, imgs_path, masks_path, subset_imgs_path, subset_masks_path, num_subset=80):
    masks_df = pd.read_csv(masks_file_path)
    subset_df = masks_df.iloc[:num_subset]
    subset_df.to_csv(subset_imgs_path / masks_file_path.name, index=False)

    for i in range(num_subset):
      img_file_name = masks_df.iloc[i]['img']
      mask_file_name = f'{img_file_name[:-4]}_mask.gif'
      img_file = imgs_path / img_file_name
      mask_file = masks_path / mask_file_name
      
      shutil.copy(img_file, subset_imgs_path)
      shutil.copy(mask_file, subset_masks_path)

    return subset_df

  # ----------------------------
  # Open an image file, resize it and save it
  # ----------------------------
  @staticmethod
  def _resize_file(fn, sz, dest_path):
    Image.open(fn).resize((sz, sz)).save(dest_path/fn.name)

  # ----------------------------
  # Reduce the image size to a smaller size
  # ----------------------------
  def _resize_data(self, orig_imgs_path, orig_masks_path, out_imgs_path, out_masks_path):
    sz=128

    orig_mask_files = list(orig_masks_path.iterdir())
    resize_masks = partial(self._resize_file, sz=sz, dest_path=out_masks_path)
    with ThreadPoolExecutor(8) as e: e.map(resize_masks, orig_mask_files)

    orig_img_files = list(orig_imgs_path.iterdir())
    resize_imgs = partial(self._resize_file, sz=sz, dest_path=out_imgs_path)
    with ThreadPoolExecutor(8) as e: e.map(resize_imgs, orig_img_files)

  # ----------------------------
  # Pre-process data by resizing images to a manageable size and creating a subset
  # of the data set
  # ----------------------------
  def pre_process_data(self, full_masks_file_path, orig_imgs_path, orig_masks_path, out_imgs_path, out_masks_path, subset_imgs_path, subset_masks_path):
    num_out_imgs = len(list(out_imgs_path.iterdir()))
    if (orig_masks_path.is_dir() and (num_out_imgs < 10)):
      self._resize_data(orig_imgs_path, orig_masks_path, out_imgs_path, out_masks_path)
    
    num_subset_imgs = len(list(subset_imgs_path.iterdir()))
    if (num_subset_imgs < 10):
      self._subset_data(full_masks_file_path, out_imgs_path, out_masks_path, subset_imgs_path, subset_masks_path)

  # ----------------------------
  # Load the data using the Image Segmentation Data Bundle
  # ----------------------------
  def load_data(self, csv_path, img_folder_path, mask_folder_path, **kwargs):
    self.db = ImageSegmentationDataBundle(csv_path, img_folder_path, mask_folder_path, **kwargs)
    self.db.do()

  # ----------------------------
  # Create the architecture
  # ----------------------------
  def create_arch(self):
    self._arch = ArchImageSegmentationUnet()
    self._arch.create_model()
    return self._arch

  # ----------------------------
  # Create a basic architecture with a simple upsampling decoder
  # ----------------------------
  def create_basic_arch(self):
    self._arch = ArchImageSegmentationBasic()
    self._arch.create_model()
    return self._arch

  # ----------------------------
  # Train the model
  # ----------------------------
  def run_train(self, freeze_on=False, split_lr=None, split=False, one_cycle=False, num_epochs=1):
    assert(isinstance(freeze_on, bool))
    assert(isinstance(split, bool))
    assert(isinstance(one_cycle, bool))

    train_dl = self.db.train_dl
    valid_dl = self.db.valid_dl

    # Loss function
    loss_func = nn.BCEWithLogitsLoss()

    # Model
    arch = self._arch
    model = arch.model
    lr_groups = arch.module_groups()
    arch.freeze(freeze_on)

    split_lr = split_lr if (split_lr is not None) else [4e-2]
    opt, hyper_cbs = HyperParams.set(model, lr_groups, split_lr, split, one_cycle, opt_func=adam_opt_func)

    # Debug Tracker
    dtr = DebugTracker(max_count=20, disp=(True, True))
    debug_cbs = [dtr, DebugYhatLossCB(fwd=False)]

    # Compute accuracy
    metrics_dict = {"acc": accuracy_thresh, "dice": dice}
    callbs=[CudaCB(device = torch.device('cuda',0)), Recorder(), ProgressCallback(), MetricsCB(metrics_dict)]
 
    callbs += hyper_cbs + debug_cbs

    loop = Trainer(train_dl, valid_dl, model, opt, loss_func, callbs, dtr=dtr)
    loop.fit(num_epochs=num_epochs)
    return loop

  # ----------------------------
  # Make some prediction inferences with the trained model
  # ----------------------------
  def run_predict(self):
    valid_dl = self.db.valid_dl
    self._arch.model.eval()
    device = list(self._arch.model.parameters())[0].device

    inps, outs, targs = [], [], []
    with torch.no_grad():
      for _, (xb, yb) in enumerate(valid_dl):
        xb = xb.to(device)
        yhat = self._arch.model(xb)

        for x, y, p in zip (xb, yb, yhat):
          inps.append(x.cpu())
          outs.append(p.cpu())
          targs.append(y.cpu())
    return inps, outs, targs

### Pre-process Kaggle data - Resize smaller images and Create subset of data

In [None]:
ispp_app = AppImageSegmentation()
ispp_app.pre_process_data(full_masks_file_path, orig_imgs_path, orig_masks_path, full_imgs_path, full_masks_path, subset_imgs_path, subset_masks_path)

In [None]:
!ls {full_imgs_path}/154ee2b6d27a*.jpg
!ls {orig_masks_path}/154ee2b6d27a*

In [None]:
subset_df = pd.read_csv(subset_imgs_path / full_masks_file_path.name)
subset_df
!ls '{subset_imgs_path}' '{subset_masks_path}'

### Load data

In [None]:
load_full = True
if (load_full):
  load_masks_file_path = full_masks_file_path
  load_masks_path = full_masks_path
  load_imgs_path = full_imgs_path
  load_bs = 64
else:
  load_masks_file_path = subset_imgs_path/'train_masks.csv'
  load_masks_path = subset_masks_path
  load_imgs_path = subset_imgs_path
  load_bs = 6

In [None]:
is_app = AppImageSegmentation()
is_app.load_data(load_masks_file_path, load_imgs_path, load_masks_path, bs=load_bs)

In [None]:
is_app.db.display_batch()

### Basic Model - Test run

In [None]:
is_app.create_basic_arch()
loop = is_app.run_train(num_epochs=1)

In [None]:
recorder=loop.cbs[1]
recorder.plot_lr(), recorder.plot_loss()

In [None]:
is_run_df, is_batch_df, is_layer_df, _ = loop.dtr.pd_results()
is_run_df
is_batch_df
is_layer_df

In [None]:
inps, outs, targs = is_app.run_predict()
is_app.db.display_results(inps, targs, outs)

### Basic Model - Freeze Encoder, Train, Unfreeze and Retrain

In [None]:
is_app.create_basic_arch()
loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=5)

In [None]:
lr=1e-2
loop = is_app.run_train(freeze_on=False, split_lr=[lr/100,lr/10,lr], split=True, one_cycle=True, num_epochs=20)
is_app._arch.save_weights(basic_model_128_path)
recorder=loop.cbs[1]
recorder.plot_lr()

In [None]:
inps, outs, targs = is_app.run_predict()
is_app.db.display_results(inps, targs, outs)

### Basic Model - Resize to larger images - (512, 512) and (1024, 1024)

In [None]:
is_app = AppImageSegmentation()
is_app.load_data(load_masks_file_path, load_imgs_path, load_masks_path, resize_sz=512, bs=16)

is_app.create_basic_arch()
is_app._arch.load_weights(basic_model_128_path)

loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=5)
lr=1e-2
loop = is_app.run_train(freeze_on=False, split_lr=[lr/100,lr/10,lr], split=True, one_cycle=True, num_epochs=8)

is_app._arch.save_weights(basic_model_512_path)

In [None]:
is_app = AppImageSegmentation()
is_app.load_data(load_masks_file_path, load_imgs_path, load_masks_path, resize_sz=1024, bs=4)

is_app.create_basic_arch()
is_app._arch.load_weights(basic_model_512_path)

loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=2)
lr=5e-3
loop = is_app.run_train(freeze_on=False, split_lr=[lr/100,lr/10,lr], split=True, one_cycle=True, num_epochs=4)

is_app._arch.save_weights(basic_model_1024_path)

### Unet Model - Test run

In [None]:
is_app.create_arch()
loop = is_app.run_train(num_epochs=1)

In [None]:
is_app._arch.summary(128)

In [None]:
recorder=loop.cbs[1]
recorder.plot_lr(), recorder.plot_loss()

In [None]:
is_run_df, is_batch_df, is_layer_df, _ = loop.dtr.pd_results()
is_run_df
is_batch_df
is_layer_df

In [None]:
inps, outs, targs = is_app.run_predict()
is_app.db.display_results(inps, targs, outs)

### Unet Model - Freeze Encoder, Train, Unfreeze and Retrain

In [None]:
is_app.create_arch()
loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=8)

In [None]:
lr=1e-2
loop = is_app.run_train(freeze_on=False, split_lr=[lr/100,lr/10,lr], split=True, one_cycle=True, num_epochs=20)
is_app._arch.save_weights(unet_model_128_path)
recorder=loop.cbs[1]
recorder.plot_lr()

In [None]:
inps, outs, targs = is_app.run_predict()
is_app.db.display_results(inps, targs, outs)

### Unet Model - Resize to larger images - (512, 512) and (1024, 1024)

In [None]:
is_app = AppImageSegmentation()
is_app.load_data(load_masks_file_path, load_imgs_path, load_masks_path, resize_sz=512, bs=16)

is_app.create_arch()
is_app._arch.load_weights(unet_model_128_path)

loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=5)
lr=1e-2
loop = is_app.run_train(freeze_on=False, split_lr=[lr/100,lr/10,lr], split=True, one_cycle=True, num_epochs=8)

is_app._arch.save_weights(unet_model_512_path)

In [None]:
is_app = AppImageSegmentation()
is_app.load_data(load_masks_file_path, load_imgs_path, load_masks_path, resize_sz=1024, bs=4)

is_app.create_arch()
is_app._arch.load_weights(unet_model_512_path)

loop = is_app.run_train(freeze_on=True, split_lr=[4e-2], one_cycle=True, num_epochs=2)
lr=4e-3
loop = is_app.run_train(freeze_on=False, split_lr=[lr/200,lr/30,lr], split=True, one_cycle=True, num_epochs=4)

is_app._arch.save_weights(basic_model_1024_path)

### Setup Tensorboard

In [None]:
!rm -r tbtry/Run-0

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
%tensorboard --logdir tbtry

### Obsolete

In [None]:
# Run this cell and select the kaggle.json file downloaded
# from the Kaggle account settings page.
from google.colab import files
files.upload()

In [None]:
# Let's make sure the kaggle.json file is present.
!ls -lha kaggle.json

In [None]:
# Next, install the Kaggle API client after forcing an upgrade
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6
!kaggle -v

# Reason for doing a force-upgrade. The underlying problem: Colab installs both py2 and py3 
# packages, and (for historical reasons) the py2 packages are installed second. kaggle is a 
# wrapper installed by the kaggle python package; since we do py2 second, the py2 wrapper 
# is in /usr/local/bin, and happens to be an older version.

In [None]:
# The Kaggle API client expects this file to be in ~/.kaggle,
# so move it there.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# This permissions change avoids a warning on Kaggle tool startup.
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# List available datasets.
!kaggle competitions list

In [None]:
# First, you have to login to Kaggle, go to that competition's page, navigate to 
# the Rules tab and accept the terms and conditions. Unless you do that, you will get
# a 403-Forbidden error when you run the command below

# Copy the carvana data set locally.
!kaggle competitions download -c carvana-image-masking-challenge

In [None]:
# It is a gigantic 24GB zip file, so delete some of the largest unnecessary files
# to reduce it to 8.7GB and save some disk space
!zip -d carvana-image-masking-challenge.zip test_hq.zip
!zip -d carvana-image-masking-challenge.zip train_hq.zip
!zipinfo carvana-image-masking-challenge.zip

In [None]:
!unzip -j carvana-image-masking-challenge.zip metadata.csv.zip train_masks.csv.zip train.zip train_masks.zip -d {data_path}
!unzip "carvana/*.zip" -d {data_path} >> /dev/null
!rm {data_path}/*.zip
!ls -l {data_path}

In [None]:
def resize_files(fn, dest_path):
  Image.open(fn).resize((128,128)).save(dest_path/fn.name)

orig_mask_files = list(orig_masks_path.iterdir())
resize_masks = partial(resize_files, dest_path=full_masks_path)
with ThreadPoolExecutor(8) as e: e.map(resize_masks, orig_mask_files)

orig_img_files = list(orig_imgs_path.iterdir())
resize_imgs = partial(resize_files, dest_path=full_imgs_path)
with ThreadPoolExecutor(8) as e: e.map(resize_imgs, orig_img_files)

In [None]:
def subset_images(masks_file_path, imgs_path, masks_path, subset_imgs_path, subset_masks_path, num_subset=80):
  masks_df = pd.read_csv(masks_file_path)
  subset_df = masks_df.iloc[:num_subset]
  subset_df.to_csv(subset_imgs_path / masks_file_path.name, index=False)

  for i in range(num_subset):
    img_file_name = masks_df.iloc[i]['img']
    mask_file_name = f'{img_file_name[:-4]}_mask.gif'
    img_file = imgs_path / img_file_name
    mask_file = masks_path / mask_file_name
    
    shutil.copy(img_file, subset_imgs_path)
    shutil.copy(mask_file, subset_masks_path)

  return subset_df

subset_df = subset_images(full_masks_file_path, full_imgs_path, full_masks_path, temp_subset_imgs_path, temp_subset_masks_path, num_subset=5)
#subset_df = subset_images(full_masks_file_path, full_imgs_path, full_masks_path, subset_imgs_path, subset_masks_path, num_subset=80)

In [None]:
class ShowData():

  # Can display both image tensors and PIL image objects
  def show_image(self, img, ax):
    ax.axis('off')
    if (isinstance(img, torch.Tensor)):
      img = img.permute(1,2,0)
    ax.imshow(img)

  def show_label(self, label, ax):
    ax.set_title(f'{label}')

  def show_mask(self, mask, ax):
    mask = mask.convert('RGBA')
    ax.imshow(mask, alpha=0.7, cmap="Reds")

  # Can display both image tensors and PIL image objects
  def show_images(self, x_imgs, y_labels, img_type='label', num_cols=10, figsize=None, **kwargs):
    assert(len(x_imgs) == len(y_labels))

    num_imgs = len(x_imgs)
    num_rows = int (math.ceil (num_imgs / num_cols))
    if (figsize is None):
      figsize=(num_cols * 3, num_rows * 3)
    fig,axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    for img, label, ax in zip (x_imgs, y_labels, axes.flat):
      self.show_image(img, ax)
      if (img_type == 'label'):
        self.show_label(label, ax)
      elif (img_type == 'mask'):
        self.show_mask(label, ax)

sd=ShowData()

In [None]:

#----------------------------------------------------
# Image Segmentation preparation pipeline
#----------------------------------------------------
class OldImageSegmentationDataBundle(DataBundle):
  def __init__(self, csv_path, img_folder_path, mask_folder_path):
    print ('--------- Image Segmentation DataBundle init', csv_path, img_folder_path, mask_folder_path)

    # Load all rows from the given CSV file
    # Split sequentially. based on a percentage ratio for training and validation. 
    #   We do this sequentially rather than randomly because each car is in a set of 16 
    #   images taken from different angles. And for a particular car we don't want some
    #   of those images to be in the training set and some in the validation set. Otherwise
    #   it will validate on the same car on which it got trained and will give artificially
    #   good validation results. To address this we make sure that the entire set of images
    #   for a particular car is entirely in training or entirely in validation. Since the
    #   items in our source CSV file are sorted and grouped by car, the set of images for a particular
    #   car appear sequentially in that file. So when we split sequentially we are ensuring that the
    #   first 'n' car sets are all in training and the next 'm' car sets are all in validation.
    # 'x' items are taken from 'img' column as image file names and
    # 'y' labels are taken from 'img' column and then transformed into mask file names
    # Convert the 'x' items from Image Files to Images
    # Convert the 'y' items from Image Files to Images
    # At runtime, dynamically read an Image and apply some image processing steps. Finally
    # convert to tensors of floats

    load_params = {'source': CSVItemContainer, 'target_cls': DfItemList, 'csv_path': csv_path}
    split_params = {'split_procedure': 'split_sequential', 'train_ratio': 0.8, 'valid_ratio': 0.2}
    extract_x_params = {'extract_procedure': 'extract_col', 'target_cls': ImageFileItemList, 'col': 'img', 'folder_path': img_folder_path}
    extract_y_params = {'extract_procedure': 'extract_custom', 'target_cls': ImageFileItemList, 'folder_path': mask_folder_path, 'custom_fn': mask_name_fn}
    convert_x_params = [
        {'target_cls': ImageItemList, 'convert_procedure': 'FileToImage'}, 
    ]
    convert_y_params = [
        {'target_cls': ImageItemList, 'convert_procedure': 'FileToImage'}, 
    ]
    xform_x_params = [
        {'xform_procedure': 'make_rgb'}, 
        {'xform_procedure': 'resize', 'size': 128},
        {'xform_procedure': 'to_byte_tensor'},        
        {'xform_procedure': 'to_float_tensor'}
        # Example only {'xform_procedure': 'Custom', 'custom_fn': func}        
    ]
    # !!!! Need to xform the masks similar to the image
    dl_params = (
        {'bs': 6, 'sampler_fn': RandomSampler, 'collate_fn': collate},     # for training
        {'bs': 6, 'sampler_fn': SequentialSampler, 'collate_fn': collate}  # for valid/test
    )
    self.show_params = {
        'layout_procedure': 'show_images', 'figsize': (20, 5),
        'xyz_procedures': ('image', 'mask', 'label')
    }
    super().__init__(load_params, split_params, extract_x_params, extract_y_params, convert_x_params, convert_y_params, xform_x_params=xform_x_params, dl_params=dl_params)

In [None]:
# All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 
# 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224. The 
# images have to be loaded in to a range of [0, 1] and 
# then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]. 

class MyFuncLayer(nn.Module):
    def __init__(self, func):
        super().__init__()
        self.func = func

    def forward(self, x): 
      ret = self.func(x)
      return ret

In [None]:
is_app._arch.freeze()

In [None]:
ShowImg.show_grid(inps[:5], outs[:5], y_method=ShowImg.show_mask)

In [None]:
outs[0][80, :60], targs[0][80, :60]

In [None]:
def smask(img):
    _, ax = plt.subplots(1, 1)
    ax.axis('off')
    #if (isinstance(img, torch.Tensor)):
    #  img = img.permute(1,2,0)
    ax.imshow(img)

smask(outs[0] > 0)
smask(targs[0])

In [None]:
def tryit():
  loop = is_app.run_train(num_epochs=5)

tryit()

In [None]:
torch.manual_seed(0)
tt_app = AppImageSegmentation()
tt_app.load_data(data_file_path, bs=64)
tt_app.load_emb(emb_x_wgts_path, emb_y_wgts_path)
tt_app.create_arch()
loop = tt_app.run_train(num_epochs=8)

**Inspect Model**

In [None]:
summary(is_app._arch)

In [None]:
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
my_res = models.resnet34(pretrained=True).to(device)
summary(my_res, input_size=(3, 128, 128))

In [None]:
my_model = is_app._arch.model.to(device)
summary(my_model, input_size=(3, 224, 224))