# HubMAP 2023 - MaskRCNN

kaggle submission notebook, with 4 data sources

1. competition dataset

2. faster-rcnn (5 files)

3. pycocotools (pip package)

4. uploaded model(s)

predict masks of images by maskrcnn model trained elsewhere

need to specify number of times of dilation and masking thershold

support raw .pth model only

# parameters

In [1]:
MODEL_PATH = "."
MODEL_HIDDEN = 256
import os
assert os.path.exists(MODEL_PATH)

In [2]:
DILATE_TIMES = 0
MASK_THRE = 0.5
RANDOM_SEED = 824

In [3]:
TEST_DIR = "/kaggle/input/hubmap-hacking-the-human-vasculature/test"
TEST_DIR

'/kaggle/input/hubmap-hacking-the-human-vasculature/test'

# setup and installs

In [4]:
!pwd

/kaggle/working


In [5]:
import os
import glob
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import json
import cv2
import matplotlib.image as mpimg
import shutil
from tqdm import tqdm

In [6]:
import torch
import torchvision
import random

torch.manual_seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [7]:
!pip install -q --no-index --find-links=/kaggle/input/offline-pycocotools pycocotools --no-build-isolation -q

# encoding

ref: https://www.kaggle.com/code/itsuki9180/hubmap-inference/notebook

In [8]:
import base64
import numpy as np
from pycocotools import _mask as coco_mask
import typing as t
import zlib

def encode_binary_mask(mask: np.ndarray) -> t.Text:
    """Converts a binary mask into OID challenge encoding ascii text."""

    # check input mask --
    if mask.dtype != bool:
        raise ValueError(
            "encode_binary_mask expects a binary mask, received dtype == %s" %
            mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
            "encode_binary_mask expects a 2d mask, received shape == %s" %
            mask.shape)

    # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str

# maskrcnn interface

In [9]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.resnet import ResNet50_Weights
import torchvision

def get_model_instance_segmentation(num_classes, defweights=True, hidden=256):
    # load an instance segmentation model pre-trained on COCO
    if defweights:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT", weights_backbone=ResNet50_Weights.IMAGENET1K_V2)
    else:
        model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=None, weights_backbone=None)
    
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = hidden
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,  hidden_layer, num_classes)

    return model

def my_collate(batch):
    return tuple(zip(*batch))

import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_album_trans(train=True):
    transforms = []
    
    if train:
        raise Exception("No training here")
    else:
        pass
    
    transforms.append(A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))) 
    transforms.append(ToTensorV2())
    return A.Compose(transforms)



In [10]:
import matplotlib.image as mpimg
import torch

class PtMaskRcnnDetector():
    def __init__(self, weight_path, num_classes=3, device="cuda:0", hidden=256):
        assert num_classes == 3 or num_classes == 4
        self.device = device
        self.transforms = get_album_trans(train=False)
        self.model = get_model_instance_segmentation(num_classes=num_classes, defweights=False, hidden=hidden).to(self.device)
        self.model.eval()
        print(self.model.load_state_dict(torch.load(weight_path, map_location = self.device)))
        print("PtMaskRcnnDetector: successfully loaded weights")

    def _processimage(self, impath):
        format = impath.split(".")[-1]
        if format == "tif":
            return mpimg.imread(impath)
        elif format == "png":
            return cv2.imread(impath)
        else:
            raise Exception("wrong image format, only tif or png are allowed!")

    def _img2tensor(self, img):
        if self.transforms is not None:
            transformed = self.transforms(image = img)
            torchim =  transformed["image"]
        else:
            torchim = torch.tensor(np.transpose(img, (2,0,1))).to(torch.float32)/255.
        return torchim.to(self.device)
    
    class PredOutput():
        def __init__(self, masks, scores) -> None:
            self.masks = masks
            self.scores = scores
        
    def predict_masks_scores(self, imgpath, thre=0.5):
        """return a dict, should be able to use dot syntax"""
        masks = list()
        scores = list()

        torchim = self._img2tensor(self._processimage(impath=imgpath)).unsqueeze(0)
        pred = self.model(torchim)

        for m in range(len(pred[0]['masks'])):
            # not blood vessel
            if (pred[0]['labels'][m] != 1): 
                continue
                
            mask = pred[0]['masks'][m].detach().permute(1,2,0).cpu().numpy()
            mask = np.where(mask>thre, 1, 0).astype(bool).squeeze(2)
            score = pred[0]['scores'][m].detach().cpu().numpy()

            masks.append(mask)
            scores.append(score)
        
        return self.PredOutput(masks=np.array(masks), scores=np.array(scores))
    
# # example usage
# MODEL_PATH = "*.pth"
# ptmrcnn_detector = PtMaskRcnnDetector(weight_path=MODEL_PATH, num_classes=3, device="cuda:0")
# imgpath = "*.tif" # OR "*.png"
# new_result = ptmrcnn_detector.predict_masks_scores(imgpath=imgpath, thre=0.5)
# new_result.masks.shape, new_result.scores

# inference helpers

In [11]:
def getgtmasks(target):
    gt_masks = list()
    for i in range(len(target["masks"])):
        if target["labels"][i] != 1 and target["labels"][i] != 2:
            continue
        else:
            e = np.where(target["masks"][i] > MASK_THRE, 1, 0).astype(bool)
            gt_masks.append(e)
    return gt_masks

def form_pred_string(pred_masks, scores):
    pred_string = ""
    firstpred = True
    for mask,score in zip(pred_masks, scores):
        encoded = encode_binary_mask(mask)
        if firstpred:
            pred_string += f"0 {score} {encoded.decode('utf-8')}"
            firstpred = False
        else:
            pred_string += f" 0 {score} {encoded.decode('utf-8')}"
    return pred_string

def dilate_mask(boolmask, times=2):
    intmask = boolmask.astype(np.uint8)
    for i in range(times):
        intmask = cv2.dilate(intmask, kernel=np.array([[0,1,0], [1,1,1], [0,1,0]]).astype(np.uint8))
    return intmask.astype(bool)

def dilate_with_kernel(boolmask, kernel = None):
    npmask = boolmask.astype(np.uint8)
    return cv2.dilate(npmask, kernel=kernel).astype(bool)

# compute

In [12]:
imglob = glob.glob(os.path.join(TEST_DIR, "*.tif"))
idlist = [x.split("/")[-1].split(".")[0] for x in imglob]

# turn to multiple instances if given sample has 1 only
if len(idlist) == 1:
    idlist = [idlist[0] for i in range(10)]
len(idlist), idlist[:5]

(10,
 ['72e40acccadf',
  '72e40acccadf',
  '72e40acccadf',
  '72e40acccadf',
  '72e40acccadf'])

In [13]:
ptmrcnn_detector = PtMaskRcnnDetector(weight_path=MODEL_PATH, num_classes=3, device="cuda:0", hidden=MODEL_HIDDEN)
print("model done")

<All keys matched successfully>
PtMaskRcnnDetector: successfully loaded weights
model done


In [14]:
print(MODEL_PATH)
print(MODEL_HIDDEN)
print(DILATE_TIMES)
print(MASK_THRE)

ids = []
heights = []
widths = []
prediction_strings = []

for id in tqdm(idlist):
    impath = os.path.join(TEST_DIR, f"{id}.tif")
    h,w,c = mpimg.imread(impath).shape

    pred_masks = []
    scores = []
    
    tres = ptmrcnn_detector.predict_masks_scores(imgpath=impath, thre=MASK_THRE)
    pred_masks = tres.masks
    scores = tres.scores

    for i in range(len(pred_masks)):
        pred_masks[i] = dilate_mask(pred_masks[i], times=DILATE_TIMES)
    
    pred_string = form_pred_string(pred_masks, scores)
    
    ids.append(id)
    heights.append(h)
    widths.append(w)
    prediction_strings.append(pred_string)
print("predictions done")
    

/kaggle/input/hubmap-maskrcnn-models/album-agg-reduce_epoch8.pth
256
0
0.5


100%|██████████| 10/10 [00:07<00:00,  1.35it/s]

predictions done





# turn to csv

In [15]:
submission = pd.DataFrame()
submission['id'] = ids
submission['height'] = heights
submission['width'] = widths
submission['prediction_string'] = prediction_strings
submission = submission.set_index('id')
submission.to_csv("submission.csv")

In [16]:
read_sub = pd.read_csv("/kaggle/working/submission.csv")
read_sub

Unnamed: 0,id,height,width,prediction_string
0,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
1,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
2,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
3,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
4,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
5,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
6,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
7,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
8,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
9,72e40acccadf,512,512,0 0.9681476354598999 eNqLT8o0s0myN/U28TXyM/I3h...
