<center><h1>Sartorius - Cell Instance Segmentation.</h1></center>

![img](https://storage.googleapis.com/kaggle-competitions/kaggle/30201/logos/header.png)

### **Semantic Segmentation: each pixel of an image is linked to a class label.**

![img](https://raw.githubusercontent.com/WaterKnight1998/SemTorch/develop/readme_images/semantic_segmentation.png)

### **Instance Segmentation: is similar to semantic segmentation, but goes a bit deeper, it identifies , for each pixel, the object instance it belongs to.**

![img](https://raw.githubusercontent.com/WaterKnight1998/SemTorch/develop/readme_images/instance_segmentation.png)

## Please don't forget to Upvote if like the work.

### if you fork the notebook please try to upvote it too.
**It keeps me motivated**



In [None]:
from fastai.vision.all import *
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import seaborn as sns
import matplotlib.image as immg
from joblib import Parallel, delayed
import PIL,cv2,gc,os,sys,torch

In [None]:
Path('/root/.cache/torch/hub/checkpoints/').mkdir(exist_ok=True, parents=True)
!cp '../input/resnet34/resnet34.pth' '/root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth' 
!cp "../input/resnet50/resnet50.pth" '/root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth'
!cp '../input/resnet18/resnet18.pth' '/root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth' 

## Data already broken down into 2x2 tiles to increase dataset, and faster training

https://www.kaggle.com/robertlangdonvinci/sartorius-cell-segmentation-data-gen/notebook

### Since data is broked into 2x2 tile some tiles contain no masks we will clean them up.

In [None]:
path = Path('../input/sartoriuscellinstancesegmentationmaskpng')

In [None]:
def label_func(fn): return f"/kaggle/input/sartoriuscellinstancesegmentationmaskpng/TrainMask2x2/{fn.stem}_mask.png"

In [None]:
img_files = get_image_files(path/'TrainImage2x2')

In [None]:
img_files_clean = [] 
for f in tqdm(img_files):
    loc = label_func(f)
    img = np.unique(np.array(Image.open(loc)))
    if len(img)!=1:
        img_files_clean.append(f)

In [None]:
len(img_files),len(img_files_clean)

In [None]:
img_files = img_files_clean 

In [None]:
n = np.random.randint(0,100)
img = PIL.Image.open(img_files[n])
mask = PIL.Image.open(label_func(img_files[n]))

In [None]:
plt.figure(1,figsize=(18,8))
plt.subplot(121)
plt.imshow(img)
plt.title('raw image')
plt.subplot(122)
plt.imshow(img)
plt.imshow(mask,alpha=0.5);
plt.title('image + mask');

In [None]:
img_path = Path('../input/sartoriuscellinstancesegmentationmaskpng/TrainImage2x2')

In [None]:
def get_classes(fnames):
    class_codes=[]
    for i in tqdm(range(len(fnames))):
        class_codes += list(np.unique(np.asarray(Image.open(label_func(fnames[i])))))
    return np.array(list(set(class_codes)))

In [None]:
codes = get_classes(img_files);codes

In [None]:
mask.shape

In [None]:
img_files_big = get_image_files('../input/sartorius-cell-instance-segmentation/train')

## Creating A dataloader

In [None]:
def label_func2(fn): 
    fn = Path(fn)
    img = np.array(Image.open(f"../input/sartoriuscellinstancesegmentationmaskpng/TrainMask2x2/{fn.stem}_mask.png"))
    img = img.clip(0,1)
    return img

In [None]:
def label_func_big(fn): 
    fn = Path(fn)
    img = np.array(Image.open(f"../input/cell-train-mask-big/{fn.stem}.png"))
    img = img.clip(0,1)
    return img

In [None]:
dls = SegmentationDataLoaders.from_label_func(img_path, bs=12, 
                                                    fnames = img_files,
                                                    label_func = label_func2, 
                                                    codes = [0,1])

In [None]:
dls.show_batch(max_n=8,figsize=(20,8))

In [None]:
len(dls.train_ds),len(dls.valid_ds)

In [None]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = -1

def cell_mask_accuracy(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [None]:
acc = cell_mask_accuracy

## IoU metrics

In [None]:
# https://forums.fast.ai/t/multi-class-semantic-segmentation-metrics-and-accuracy/74665/4
# Return Jaccard index, or Intersection over Union (IoU) value
def IoU(preds:Tensor, targs:Tensor, eps:float=1e-8):
    """Computes the Jaccard loss, a.k.a the IoU loss.
    Notes: [Batch size,Num classes,Height,Width]
    Args:
        targs: a tensor of shape [B, H, W] or [B, 1, H, W].
        preds: a tensor of shape [B, C, H, W]. Corresponds to
            the raw output or logits of the model. (prediction)
        eps: added to the denominator for numerical stability.
    Returns:
        iou: the average class intersection over union value 
             for multi-class image segmentation
    """
    num_classes = preds.shape[1]
    
    # Single class segmentation?
    if num_classes == 1:
        true_1_hot = torch.eye(num_classes + 1)[targs.squeeze(1)]
        true_1_hot = true_1_hot.permute(0, 3, 1, 2).float()
        true_1_hot_f = true_1_hot[:, 0:1, :, :]
        true_1_hot_s = true_1_hot[:, 1:2, :, :]
        true_1_hot = torch.cat([true_1_hot_s, true_1_hot_f], dim=1)
        pos_prob = torch.sigmoid(preds)
        neg_prob = 1 - pos_prob
        probas = torch.cat([pos_prob, neg_prob], dim=1)
        
    # Multi-class segmentation
    else:
        # Convert target to one-hot encoding
        # true_1_hot = torch.eye(num_classes)[torch.squeeze(targs,1)]
        true_1_hot = torch.eye(num_classes)[targs.squeeze(1)]
        
        # Permute [B,H,W,C] to [B,C,H,W]
        true_1_hot = true_1_hot.permute(0, 3, 1, 2).float()
        
        # Take softmax along class dimension; all class probs add to 1 (per pixel)
        probas = F.softmax(preds, dim=1)
        
    true_1_hot = true_1_hot.type(preds.type())
    
    # Sum probabilities by class and across batch images
    dims = (0,) + tuple(range(2, targs.ndimension()))
    intersection = torch.sum(probas * true_1_hot, dims) # [class0,class1,class2,...]
    cardinality = torch.sum(probas + true_1_hot, dims)  # [class0,class1,class2,...]
    union = cardinality - intersection
    iou = (intersection / (union + eps)).mean()   # find mean of class IoU values
    return iou

## Creating a UNet Learner

**This module builds a dynamic U-Net from any backbone pretrained on ImageNet, automatically inferring the intermediate sizes.**

***

![dynamicUnet](https://fastai1.fast.ai/imgs/u-net-architecture.png)

***

**This is the original U-Net. The difference here is that the left part is a pretrained model.**

https://fastai1.fast.ai/vision.models.unet.html

In [None]:
learn = unet_learner(dls, resnet34,  model_dir='/kaggle/working/',metrics=[acc,Dice(),IoU]).to_fp16()

* Launch a mock training to find a good learning rate

In [None]:
learn.lr_find()

In [None]:
gc.collect()

* Start your training

In [None]:
cb1 = SaveModelCallback(monitor='IoU',fname='best_model',comp=np.greater) # Callbacks
cb2 = ReduceLROnPlateau(monitor='IoU', patience=1,factor=0.2)
learn.fit_one_cycle(2, 1e-3,cbs = [cb1,cb2])

In [None]:
dls_big = SegmentationDataLoaders.from_label_func(img_path, bs=4, 
                                              fnames = img_files_big,
                                              label_func = label_func_big, 
                                              codes = [0,1])

In [None]:
learn.dls = dls_big

In [None]:
cb1 = SaveModelCallback(monitor='IoU',fname='best_model_big',comp=np.greater) # Callbacks
cb2 = ReduceLROnPlateau(monitor='IoU', patience=1,factor=0.2)
learn.fit_one_cycle(1, 1e-5, cbs = [cb1,cb2])

In [None]:
learn.load('/kaggle/working/best_model_big');

In [None]:
try:
    learn.export('/kaggle/working/export.pkl')
except:
    pass

In [None]:
learn.show_results(max_n = 8, figsize = (10,16) )

## our top 3 losses

In [None]:
interp = SegmentationInterpretation.from_learner(learn)
interp.plot_top_losses(k=3)

## Loading Submission files and predicting results

In [None]:
submission = pd.read_csv('../input/sartorius-cell-instance-segmentation/sample_submission.csv')
submission.head()

In [None]:
test_data_path = submission['id'].apply(lambda x:f'../input/sartorius-cell-instance-segmentation/test/{x}.png').tolist()

In [None]:
tst_dl = learn.dls.test_dl(test_data_path)
preds = learn.get_preds(dl = tst_dl)[0]

In [None]:
prediction_masks = [x.argmax(axis=0) for x in preds]

## A look at test predictions

In [None]:
im_num = 2
ts_img = PIL.Image.open(test_data_path[im_num])
ts_mask = prediction_masks[im_num]

In [None]:
plt.figure(1,figsize=(18,8))
plt.subplot(121)
plt.imshow(ts_img)
plt.title('Test Image')
plt.subplot(122)
plt.imshow(ts_img)
plt.imshow(ts_mask,alpha=0.5);
plt.title('Test Image + Predicted Mask');

## Converting predicted semantic masks to instance masks and then to run length encodings

**Since fastai only provides semantic segmentation we will use a hack to convert it into intance segmentation masks**

**We will use an algorithm called connected components algorithm to convert semantic mask to instance mask**

**Connected Component Labeling (CCL)** is a basic algorithm in image processing and an essential step in nearly every application dealing with object detection. It groups together pixels belonging to the same connected component

![ccl](https://homepages.inf.ed.ac.uk/rbf/HIPR2/labelb.gif)

In [None]:
def CCL(img_arr):
    img = img_arr
    # Converting those pixels with values 1-127 to 0 and others to 1
    #img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]
    # Applying cv2.connectedComponents() 
    num_labels, labels = cv2.connectedComponents(img)
    # Map component labels to hue val, 0-179 is the hue range in OpenCV
    label_hue = np.uint8(179*labels/np.max(labels))
    blank_ch = 255*np.ones_like(label_hue)
    labeled_img = cv2.merge([label_hue, blank_ch, blank_ch])
    ret_lbl = labeled_img.copy()
    return ret_lbl[:,:,0]

In [None]:
is_mask = np.expand_dims(prediction_masks[im_num].numpy(),axis=-1).astype(np.uint8)
is_img = CCL(is_mask)

In [None]:
plt.figure(1,figsize=(18,8))
plt.subplot(131)
plt.imshow(ts_img)
plt.title('Test Image')
plt.subplot(132)
plt.imshow(is_img)
plt.title('Instance Converted mask')
plt.subplot(133)
plt.imshow(ts_img)
plt.imshow(is_mask,alpha=0.5);
plt.title('Test Image upon Instance Converted Mask');

**See how CCL algorithm has colored each mask with a different color**

In [None]:
# From https://www.kaggle.com/stainsby/fast-tested-rle
def rle_decode(mask_rle, shape=(520, 704)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
def convert_seg_ins(img_f):
    lbl_img1 = img_f.copy()
    grps = list(np.unique(lbl_img1))
    grps.remove(0)
    all_masks = []
    shape = (520,704)
    for g in grps:
        a = np.where(((lbl_img1!=0)&(lbl_img1!=g)),np.zeros(shape),lbl_img1)
        all_masks.append(a.clip(0,1))
    all_masks = np.array(all_masks)
    return all_masks

### Writing masks to rle

In [None]:
sub_ids = submission['id'].values

In [None]:
res = []
n = 0
for i in tqdm(range(len(prediction_masks))):
    chk_mask = np.expand_dims(prediction_masks[i].numpy(),axis=-1).astype(np.uint8)
    lbl_img = CCL(chk_mask)
    pred_masks = convert_seg_ins(lbl_img)
    for mask in pred_masks:
        ts = np.unique(mask, return_counts=True)[1][1]
        #removing blocks with very small areas
        if ts>50:
            res.append([sub_ids[i],rle_encode(mask)])

In [None]:
sub_df = pd.DataFrame(res,columns=['id', 'predicted'])

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv',index=False)

## Please don't forget to Upvote if like the work.

In [None]:
sub_df['id'].value_counts()