# Detectron: ensemble inference with NMS

## Acknowledgements

Based on these excellent notebooks [Positive score with Detectron 2/3 - Training](https://www.kaggle.com/slawekbiel/positive-score-with-detectron-2-3-training) and [Positive score with Detectron 3/3 - Inference](https://www.kaggle.com/slawekbiel/positive-score-with-detectron-3-3-inference). Please upvote them.

[Weighted boxes fusion](https://github.com/ZFTurbo/Weighted-Boxes-Fusion) library is also used.

## Install and import libraries

In [None]:
KAGGLE = True

In [None]:
IPATH = '../input/detectron-05/whls'
if KAGGLE:
    !pip install {IPATH}/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar --no-index --find-links ../input/detectron-05/whls 
    !pip install {IPATH}/fvcore-0.1.5.post20211019/fvcore-0.1.5.post20211019 --no-index --find-links ../input/detectron-05/whls 
    !pip install {IPATH}/antlr4-python3-runtime-4.8/antlr4-python3-runtime-4.8 --no-index --find-links ../input/detectron-05/whls 
    !pip install {IPATH}/detectron2-0.5/detectron2 --no-index --find-links ../input/detectron-05/whls 
    !pip install ../input/ensemble-boxes-104/ensemble_boxes-1.0.4/ -f ./ --no-index

In [None]:
import os
import cv2
import json
import time
import numpy as np
import pandas as pd
import torch
import detectron2
from tqdm.auto import tqdm
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import inference_on_dataset
from detectron2.evaluation.evaluator import DatasetEvaluator
from detectron2.data import DatasetCatalog, build_detection_test_loader
import pycocotools.mask as mask_util
from PIL import Image
import matplotlib.pyplot as plt
from fastcore.all import *
from ensemble_boxes import *
os.environ['CUDA_VISIBLE_DEVICES'] = '0' if KAGGLE else '1'
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
    print('GPU is available')
else:
    DEVICE = torch.device('cpu')
    print('CPU is used')
print('detectron ver:', detectron2.__version__)

## Config load

In [None]:
config1 = 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'
config2 = 'Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml'
config3 = 'COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml'

mdl_path1 = "../input/best-transfer-100k"
#mdl_path2="../input/best-100k-firstrun"

best_models=(
  
        {'file': 'model_x101_pseudo.pth','LB score': 0.314, 'CV Score': 0.298, 'ths':[.19, .39, .67],'config':config3},
        {'file': 'model_cascade_pseudo.pth','LB score': 0.311, 'CV Score': 0.292, 'ths':[.19, .39, .73],'config':config2},
         # {'file': 'model_mask_r50.pth','LB score': 0.307, 'CV Score': 0.3079, 'ths':[.15, .35, .58],'config':config1},
            )


DATA_PATH = "../input/sartorius-cell-instance-segmentation"
MODELS = []
BEST_MODELS =[]
THSS = []
ID_TEST = 0
SUBM_PATH = f'{DATA_PATH}/test'
SINGLE_MODE = False
NMS = True
MIN_PIXELS = [75, 150, 75]
IOU_TH = .4

for model in best_models:
    model_name=model["file"]
    model_ths=model["ths"]
    config=model['config']
    BEST_MODELS.append(model_name)
    THSS.append(model_ths)
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config))
    cfg.INPUT.MASK_FORMAT = 'bitmask'
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3 
    cfg.MODEL.WEIGHTS = f'{mdl_path1}/{model_name}'  
    cfg.TEST.DETECTIONS_PER_IMAGE = 1000

    cfg.TEST.FLIP = True
    cfg.TEST.PRECISE_BN.NUM_ITER = 200
    
    MODELS.append(DefaultPredictor(cfg))
print(f'all loaded:\nthresholds: {THSS}\nmodels: {BEST_MODELS}')

## Utils

In [None]:
def rle_decode(mask_rle, shape=(520, 704)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) 
                       for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = 1
    return img.reshape(shape)  # Needed to align to RLE direction

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def pred_masks(file_name, path, model, ths, min_pixels):
    img = cv2.imread(f'{path}/{file_name}')
    output = model(img)
    pred_classes = output['instances'].pred_classes.cpu().numpy().tolist()
    pred_class = max(set(pred_classes), key=pred_classes.count)
    take = output['instances'].scores >= ths[pred_class]
    pred_masks = output['instances'].pred_masks[take]
    pred_masks = pred_masks.cpu().numpy()
    result = []
    used = np.zeros(img.shape[:2], dtype=int) 
    for i, mask in enumerate(pred_masks):
        mask = mask * (1 - used)
        if mask.sum() >= min_pixels[pred_class]:
            used += mask
            result.append(rle_encode(mask))
    return result

def ensemble_preds(file_name, path, models, ths):
    img = cv2.imread(f'{path}/{file_name}')
    classes = []
    scores = []
    bboxes = []
    masks = []
    for i, model in enumerate(models):
        output = model(img)
        pred_classes = output['instances'].pred_classes.cpu().numpy().tolist()
        pred_class = max(set(pred_classes), key=pred_classes.count)
        take = output['instances'].scores >= ths[i][pred_class]
        classes.extend(output['instances'].pred_classes[take].cpu().numpy().tolist())
        scores.extend(output['instances'].scores[take].cpu().numpy().tolist())
        bboxes.extend(output['instances'].pred_boxes[take].tensor.cpu().numpy().tolist())
        masks.extend(output['instances'].pred_masks[take].cpu().numpy())
    assert len(classes) == len(masks) , 'ensemble lenght mismatch'
    scores, classes, bboxes, masks = zip(
        *sorted(zip(scores, classes, bboxes, masks), 
                reverse=True))
    return classes, scores, bboxes, masks

def nms_predictions(classes, scores, bboxes, masks, 
                    iou_th=.5, shape=(520, 704)):
    he, wd = shape[0], shape[1]
    boxes_list = [[x[0] / wd, x[1] / he, x[2] / wd, x[3] / he]
                  for x in bboxes]
    scores_list = [x for x in scores]
    labels_list = [x for x in classes]
    nms_bboxes, nms_scores, nms_classes = nms(
        boxes=[boxes_list], 
        scores=[scores_list], 
        labels=[labels_list], 
        weights=None,
        iou_thr=iou_th
    )
    nms_masks = []
    for s in nms_scores:
        nms_masks.append(masks[scores.index(s)])
    nms_scores, nms_classes, nms_masks = zip(
        *sorted(
            zip(nms_scores, nms_classes, nms_masks), 
            reverse=True))
    return nms_classes, nms_scores, nms_masks

def ensemble_pred_masks(masks, classes, min_pixels, shape=(520, 704)):
    result = []
    pred_class = max(set(classes), key=classes.count)
    used = np.zeros(shape, dtype=int) 
    for i, mask in enumerate(masks):
        mask = mask * (1 - used)
        if mask.sum() >= min_pixels[pred_class]:
            used += mask
            result.append(rle_encode(mask))
    return result

## Demo inference

In [None]:
test_names = os.listdir(SUBM_PATH)
print('test images:', len(test_names))

In [None]:
encoded_masks_single = pred_masks(
    test_names[ID_TEST], 
    path=SUBM_PATH, 
    model=MODELS[0],
    ths=THSS[0],
    min_pixels=MIN_PIXELS
)

In [None]:
classes, scores, bboxes, masks = ensemble_preds(
    file_name=test_names[ID_TEST] , 
    path=SUBM_PATH, 
    models=MODELS, 
    ths=THSS
)
if NMS:
    classes, scores, masks = nms_predictions(
        classes, 
        scores, 
        bboxes,
        masks, 
        iou_th=IOU_TH
    )
encoded_masks = ensemble_pred_masks(masks, classes, min_pixels=MIN_PIXELS)

In [None]:
'''_, axs = plt.subplots(2, 2, figsize=(14, 8))
axs[0][0].imshow(cv2.imread(f'{SUBM_PATH}/{test_names[ID_TEST]}'))
axs[0][0].axis('off')
axs[0][0].set_title(test_names[ID_TEST])
for en_mask in encoded_masks_single:
    dec_mask = rle_decode(en_mask)
    axs[0][1].imshow(np.ma.masked_where(dec_mask == 0, dec_mask))
    axs[0][1].axis('off')
    axs[0][1].set_title('single model')
axs[1][0].imshow(cv2.imread(f'{SUBM_PATH}/{test_names[ID_TEST]}'))
axs[1][0].axis('off')
axs[1][0].set_title(test_names[ID_TEST])
for en_mask in encoded_masks:
    dec_mask = rle_decode(en_mask)
    axs[1][1].imshow(np.ma.masked_where(dec_mask == 0, dec_mask))
    axs[1][1].axis('off')
    axs[1][1].set_title('ensemble models')
plt.show()
'''

## Inference

In [None]:
# create dict key from bbox
def bbox_to_key(bbox):
    return str(np.round(bbox, 6))

# TTA inputs:
# file: image to process
# predictor_list: list of predictors to use, single or multiple for ensembling
# aug_list: list of augmentations to perform. Augmentations must be "bidirectional" - applying twice will get back to original.
#           Also augmentations must support image, bboxes and masks
def TTA(file, predictor_list, aug_list=[None]):
    boxes = []
    box_scores = []
    masks = []
    masks_lkup =[]
    pclass = []
    im = cv2.imread(file)
    for predict in predictor_list:
        for aug in aug_list:
            # perform augmentations
            if aug is not None:
                transform = aug
                ima = transform(image=im)['image']
            else:
                ima = im
            # make prediction
            pred = predict(ima)
            h, w = pred['instances'].image_size[0], pred['instances'].image_size[1]
            classes = pred['instances'].pred_classes.cpu().numpy()-1
            if len(pclass) == 0:
                pclass = classes
            else:
                pclass = np.concatenate((pclass, classes))
            # get box predictions, and nomrmalize to 0-1 range
            pred_boxes = [A.normalize_bbox(box, h, w) for box in pred['instances'].pred_boxes.tensor.cpu().numpy()]
            # transform back to original
            if aug is not None:
                pred_boxes = transform(image=ima, bboxes=pred_boxes)['bboxes']
            # get mask prediction
            pred_masks = pred['instances'].pred_masks.cpu().numpy()*1
            # transform back to original
            if aug is not None:
                pred_masks = transform(image=ima, masks=pred_masks)['masks']
            # lookup table for bbox to mask index reference
            pred_dict = {}
            for i in range(len(pred_boxes)):
                pred_dict[bbox_to_key(pred_boxes[i])] = i
            # append results to list
            boxes.append(np.array(pred_boxes))
            box_scores.append(np.array(pred['instances'].scores.detach().cpu().numpy()))
            masks.append(np.array(pred_masks, dtype=np.uint8))
            masks_lkup.append(pred_dict)
    
            del pred, pred_boxes, pred_masks, ima, pred_dict
    
    del im
    gc.collect()
    predicted_class = stats.mode(pclass)[0][0]
    return boxes, box_scores, masks, masks_lkup, predicted_class

In [None]:
predictor20 = MODELS[0]
from detectron2.utils.visualizer import Visualizer

In [None]:
TITLES = ['Original', 'Horizontal flip', 'Vertical flip', 'Rotation 180']

def plt_pred(file):
    fig = plt.figure(figsize=(20,15))
    im = cv2.imread(file)
    # org
    fig.add_subplot(2, 2, 1)
    plt.tight_layout()
    outputs = predictor20(im)
    v = Visualizer(im[:, :, ::-1])
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.title(TITLES[0])
    plt.imshow(out.get_image()[:, :, ::-1]);
    # flip horizontal/vertical/both
    for i in range(1, -2, -1):
        imh = cv2.flip(im, i)
        fig.add_subplot(2, 2, 3-i)
        plt.tight_layout()
        outputs = predictor20(imh)
        v = Visualizer(imh[:, :, ::-1])
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        plt.title(TITLES[2-i])
        plt.imshow(out.get_image()[:, :, ::-1]);

FILE = '../input/sartorius-cell-instance-segmentation/test/7ae19de7bc2a.png'
plt_pred(FILE)

In [None]:
import albumentations as A
import gc
from scipy import stats

In [None]:
AUGMENTATIONS = [None, A.HorizontalFlip(p=1.0), A.VerticalFlip(p=1.0), A.Rotate(limit=(180,180), p=1.0)]

img_org = cv2.imread(FILE)
boxes, box_scores, masks, masks_lkup, pred_class = TTA(FILE, [MODELS[0]], AUGMENTATIONS)

def show_boxes(im, boxes_list, h, w, color=(31, 119, 180), orig=False):
    thickness = 2
    idx = 0
    if orig:
        idx = 4
    for i in range(len(boxes_list)):
        x1 = int(h * boxes_list[i][idx])
        y1 = int(w * boxes_list[i][idx+1])
        x2 = int(h * boxes_list[i][idx+2])
        y2 = int(w * boxes_list[i][idx+3])
        cv2.rectangle(im, (x1, y1), (x2, y2), color, thickness)
    return im

fig = plt.figure(figsize=(20, 15))
columns = 2
rows = (len(AUGMENTATIONS)//columns) + (len(AUGMENTATIONS) % 2)
for i in range(1,len(AUGMENTATIONS)+1):
    fig.add_subplot(rows, columns, i)
    plt.tight_layout()
    img = img_org
    img = show_boxes(img, boxes[i-1], img.shape[1], img.shape[0])
    plt.title(TITLES[i-1])
    plt.imshow(img)
    
plt.show();

In [None]:
def show_masks(im, masks_list, h, w):
    m = np.zeros((w,h), dtype=np.uint8)
    for i in range(len(masks_list)):
        m = np.logical_or(m, masks_list[i])
    return im * np.dstack([m]*3)

fig = plt.figure(figsize=(20, 15))
columns = 2
rows = (len(AUGMENTATIONS)//columns) + (len(AUGMENTATIONS) % 2)
for i in range(1,len(AUGMENTATIONS)+1):
    fig.add_subplot(rows, columns, i)
    plt.tight_layout()
    img = img_org
    plt.imshow(show_masks(img, masks[i-1], img.shape[1], img.shape[0]))
    plt.title(TITLES[i-1])
plt.show();

In [None]:
subm_ids, subm_masks = [], []
for test_name in tqdm(test_names):
    FILE = test_name
    boxes, box_scores, masks, masks_lkup, pred_class = TTA(SUBM_PATH+'/'+FILE, [MODELS[0]], AUGMENTATIONS) #, MODELS[1]
    encoded_masks=[]

    for mask in masks:
        enc_mask  = rle_encode(mask)
        subm_masks.append(enc_mask)
        subm_ids.append(test_name[:test_name.find('.')])

In [None]:
pd.DataFrame({
    'id': subm_ids, 
    'predicted': subm_masks
}).to_csv('submission.csv', index=False)
pd.read_csv('submission.csv').head()
