# Cancer Instance Segmentation from Tissue

**YAI 2021 Fall Project - Medical Project Team**

* **[Dongha Kim](https://github.com/kdha0727)**

* **[Donggeon Bae](https://github.com/AttiBae)**

* **[Junho Lee](https://github.com/leejunho0421)**

# Runtime Preparation

## Mount Data Drive

In [None]:
!git clone https://github.com/kdha0727/cancer-instance-segmentation-from-tissue.git
import os
import sys
sys.path.insert(0, os.path.join(os.getcwd(), 'cancer-instance-segmentation-from-tissue'))
try:
    from google.colab import drive  # NOQA
except ImportError:
    raise RuntimeError("This notebook must be run on colab runtime!")
else:
    drive.mount('/content/drive')  # NOQA
    %cd "/content/drive/Shareddrives/YAI 2021 가을학기 의료 프로젝트팀"

## View Runtime Information

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0 or gpu_info.find('not found') >= 0:
    import os
    if 'TPU_NAME' in os.environ:
        mode = 'xla'; print('TPU Runtime')
    else:
        mode = 'cpu'; print('Not connected to a GPU')
else:
    mode = 'cuda'; print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print(f'Your runtime has {ram_gb:.1f} gigabytes of available RAM\n'
      f'{"Not" if ram_gb < 20 else "You are"} using a high-RAM runtime!')

In [None]:
import sys
import platform
print(f"OS version: \t\t{platform.platform()}\nPython version:\t\t{sys.version.replace(chr(10), str())}")

## Prepare device and library

In [None]:
# Prepare device

if mode == 'xla':
    try:
        import torch_xla
    except ImportError:
        !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
        !python pytorch-xla-env-setup.py
        !rm -rf pytorch-xla-env-setup.py *.whl
        import torch_xla
    import torch
    import torch_xla.core.xla_model as xm
    # Acquires the default Cloud TPU core and moves the model to it
    device = xm.xla_device()

elif mode == 'cuda':
    import torch
    device = torch.device("cuda")
    # loader_kwargs = dict(pin_memory=True)

else:
    import torch
    device = torch.device("cpu")

print(f"Torch version:\t\t{torch.__version__}\nTorch device:\t\t{device}")

In [None]:
# After all installation, import all libraries used.

!pip install torchinfo
!pip install pyclean
!pyclean .

import numpy as np
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import ChainDataset, RandomSampler, DataLoader
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
import torchinfo


# Dataset Preparation

**Dataset Information**

This particular directory includes training patches of size 256x256 and their masks, this is one of the folds. In total there are more than 7 thousand training patches within all the three folds.

The files within each fold directories are:

* `images.npy` - image patches of 256x256

* `masks.npy` an array of 6 channel instance-wise masks (0: Neoplastic cells, 1: Inflammatory, 2: Connective/Soft tissue cells, 3: Dead Cells, 4: Epithelial, 6: Background)

* `types.py`  tissue type that a given path was extracted from.

## Make Dataset Class

In [None]:
from datasets.numpy_lazy import LazyNumpyDataset

## Instantiate Dataset

In [None]:
!cp --verbose -r data /content/data

In [None]:
batch_length = [886, 885, 885, 841, 841, 841, 908, 907, 907]

train_subset = [0, 2, 3, 4, 6, 8]
val_subset = [1, 5]
test_subset = [7]

path_format = os.path.join("/content/data", "processed", "{0}", "{1}.npy")
train_dataset = ChainDataset([
    LazyNumpyDataset(
        image_path=path_format.format("images", i),
        mask_path=path_format.format("masks", i),
        length=batch_length[i],
        sampler_class=RandomSampler,  # issue
    ) for i in train_subset
])
val_dataset = ChainDataset([
    LazyNumpyDataset(
        image_path=path_format.format("images", i),
        mask_path=path_format.format("masks", i),
        length=batch_length[i],
    ) for i in val_subset
])
test_dataset = ChainDataset([
    LazyNumpyDataset(
        image_path=path_format.format("images", i),
        mask_path=path_format.format("masks", i),
        length=batch_length[i],
    ) for i in test_subset
])

# Network Preparation

## Segmentation Network

**DeepLabV3 + Resnet101**: Baseline Model

* **Paper**: [Arxiv 1706.05587](https://arxiv.org/abs/1706.05587)

* **Implementation**: [Pytorch Vision](https://pytorch.org/hub/pytorch_vision_deeplabv3_resnet101/)

**U-Net**

* **Paper**: [Arxiv 1505.04597](https://arxiv.org/abs/1505.04597)

* **Implementation**: [models/unet.py](models/unet.py)

**Inception U-Net**

* **Paper**: [ACM 10.1145/3376922](https://dl.acm.org/doi/abs/10.1145/3376922)

* **Implementation**: [models/unet.py](models/unet.py)

**RefineNet**

* **Paper**: [Arxiv 1611.06612](https://arxiv.org/abs/1611.06612)

* **Implementation**: [models/refinenet.py](models/refinenet.py)


In [None]:
from torchvision.models.segmentation.deeplabv3 import DeepLabHead
from torchvision.models.segmentation.fcn import FCNHead
from torchvision.models.segmentation import deeplabv3_resnet101

from models.unet import UNet, InceptionUNet
from models.refinenet import refinenet50, refinenet101, refinenet152, rf_lw50, rf_lw101, rf_lw152

In [None]:
# # Baseline: DeeplabV3 + ResNet101

# # Pretrained Model
net = deeplabv3_resnet101(pretrained=True, progress=False)
net.classifier = DeepLabHead(2048, 6)
# net.aux_classifier = nn.Sequential()
net.aux_classifier = FCNHead(1024, 6)

# # Non-pretrained Model
# net = deeplabv3_resnet101(pretrained=False, num_classes=6)

trainable_backbone_layers = ['layer4']
for n, p in net.named_parameters():
    if n.startswith('backbone') and n.split('.')[1] not in trainable_backbone_layers:
        p.requires_grad = False

net.to(device)
if torch.cuda.device_count() > 1:
    net = torch.nn.DataParallel(net)
    net.to(device)

torchinfo.summary(net, (1, 3, 256, 256))

## Loss Network

* **Binary Cross Entropy**

* **Dice Coefficient**

* **Intersection over Union Score**

* More Multi-Label Segmentation Losses: https://jeune-research.tistory.com/entry/Loss-Functions-for-Image-Segmentation-Region-Based-Losses

* See also: https://smp.readthedocs.io/en/latest/losses.html

In [None]:
from models.loss import BCEDiceIoUWithLogitsLoss2d, BCEDiceIoULoss2d

# Training

## Set Hyper Parameters

In [None]:
from utils.lr_scheduler import CosineAnnealingWarmUpRestarts

# Lazy-eval iterable dataset: do not set sampler or shuffle options
num_epoch = 100

batch_size = 35
num_workers = 1

loss_function = BCEDiceIoUWithLogitsLoss2d()
optimizer_class = torch.optim.Adam
optimizer_config = {'lr': 1e-6}
scheduler_class = CosineAnnealingWarmUpRestarts
scheduler_config = {'T_0': 10, 'T_mult': 2, 'eta_max': 1e-3, 'T_up': 3, 'gamma': 0.5}

## Train and Evaluate

In [None]:
train_loader = DataLoader(train_dataset, batch_size, num_workers=num_workers, drop_last=False)
val_loader = DataLoader(val_dataset, batch_size, num_workers=num_workers, drop_last=False)

optimizer = optimizer_class(net.parameters(), **optimizer_config)
lr_scheduler = scheduler_class(optimizer, **scheduler_config)


def load_state_dict(d):
    net.load_state_dict(d['model'])
    optimizer.load_state_dict(d['optimizer'])
    lr_scheduler.load_state_dict(d['lr_scheduler'])


def state_dict():
    from collections import OrderedDict
    d = OrderedDict()
    d['model'] = net.state_dict()
    d['optimizer'] = optimizer.state_dict()
    d['lr_scheduler'] = lr_scheduler.state_dict()
    return d


In [None]:
import uuid
from utils.training import train_one_epoch

checkpoint_dir = f'checkpoint/{net.__class__.__name__}-{uuid.uuid4()}'
os.makedirs('checkpoint', exist_ok=True)

for ep in range(num_epoch):
    train_one_epoch(net, loss_function, optimizer, lr_scheduler, train_loader, val_loader, device, ep, warmup_start=False)
    torch.save(state_dict(), os.path.join(checkpoint_dir, 'epoch{}.pt').format(ep))

## Test

In [None]:
from utils.evaluation import all_together, draw_confusion_matrix, show

net.eval()

num_workers = 4
test_calc_loader = DataLoader(test_dataset, 64, num_workers=num_workers, drop_last=False)
test_show_loader = DataLoader(test_dataset, 1, num_workers=num_workers, drop_last=False)
bce, dice, iou, correct, cm = all_together(net, test_calc_loader, device=device, verbose=True)

In [None]:
label_names = [
    "Neoplastic cells",
    "Inflammatory",
    "Connective/Soft tissue cells",
    "Dead Cells",
    "Epithelial",
    "Background"
]

draw_confusion_matrix(
    cm[:5, :5], label_names, label_names,
    figsize=(10, 8), title="Pixel-Wise Confusion Matrix"
)

In [None]:
show(net, test_show_loader, 10)