In [1]:
import torch
import torchvision
import detectron2
import numpy as np

import os, json, random
from pathlib import Path
from typing import List, Dict, Tuple
import matplotlib.pyplot as plt

from skimage.io import imread, imsave

from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.data import (MetadataCatalog, 
                             DatasetCatalog,
                             DatasetMapper,
                             build_detection_test_loader, 
                             build_detection_train_loader)
import pandas as pd

import utils

## Setting up training, validation and test groups
Should use all MoNuSeg data, not only the test data

In [2]:
monuseg_patinfo = pd.read_csv(Path(os.getcwd()).parent / 'data/monuseg/patient_information.csv')

monuseg_train_ids = monuseg_patinfo[monuseg_patinfo['training'] == 'yes']['patient_id']
monuseg_test_ids = monuseg_patinfo[monuseg_patinfo['training'] == 'no']['patient_id']
monuseg_train_ids, monuseg_val_ids = utils.val_split(monuseg_train_ids)

In [3]:
def get_nuclei_dicts(patient_ids : List[str]) -> List[Dict]:
    im_dir = '../data/monuseg/images'
    width, height = (1000, 1000)
    
    dataset_dicts = []
    for pid in patient_ids:
        record = {}
        record["file_name"] = im_dir + f"/{pid}.png"
        record["image_id"] = pid 
        record["height"] = height
        record["width"] = width

        objs = []
        for a in utils.get_annotation(pid):
            if len(a) < 6:
                break
            px, py = zip(*a)
            poly = [(x + 2.5, y + 2.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {
                "xy" : list(zip(px, py)),
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts
            

In [4]:
# https://github.com/facebookresearch/detectron2/issues/1763#issuecomment-659236931
import copy
from PIL import Image
from detectron2.data import detection_utils
import detectron2.data.transforms as T 
WIDTH, HEIGHT = (256, 256)

def to_greyscale(img : Image) -> Image:
    img.convert('LA')
    return img

def mapper(dataset_dict):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = detection_utils.read_image(dataset_dict["file_name"], format="BGR")

    image, transforms = T.apply_transform_gens([
        T.RandomCrop(crop_size = (WIDTH*2, HEIGHT*2), crop_type='absolute_range'),
        T.RandomFlip(prob=0.50, horizontal=True, vertical=False),
        T.RandomFlip(prob=0.50, horizontal=False, vertical=True),
        #T.RandomApply(T.ResizeTransform(HEIGHT, WIDTH, HEIGHT*1.1, WIDTH), prob=0.2),
        T.RandomRotation(angle=(0,360), expand=True, center=None, sample_style="range", interp=None),
        T.CropTransform(int(WIDTH*0.5), int(HEIGHT*0.5), WIDTH, HEIGHT),
        T.PILColorTransform(to_greyscale)

    ], image)
    
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        detection_utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    dataset_dict["annotations"] = annos
    instances = detection_utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = detection_utils.filter_empty_instances(instances)
    return dataset_dict

class Trainer(DefaultTrainer):
    @classmethod
    def build_test_loader(cls, cfg, dataset_name):
        return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))

    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=mapper)

# cfg is your standard get_cfg() object of detectron2

In [5]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def draw_data_dict(data_dict):
    fig,ax = plt.subplots(1, figsize=(5,5))
    im = imread(data_dict["file_name"])
    im = data_dict["image"]
        
    ax.imshow(im)
    for obj in data_dict["annotations"]:
        # Bounding box
        xmin, ymin, xmax, ymax = obj["bbox"]
        bbox = patches.Rectangle((xmin,ymin),xmax-xmin,ymax-ymin,linewidth=1,edgecolor='r',facecolor='none')
        ax.add_patch(bbox)
        
        # Mask
        mask = patches.Polygon(obj["xy"], alpha = 0.5)
        ax.add_patch(mask)
    plt.show()


In [6]:
DatasetCatalog.register("nuclei_train", lambda :get_nuclei_dicts(monuseg_train_ids))
MetadataCatalog.get("nuclei_train").set(thing_classes=["nuclei"])

DatasetCatalog.register("nuclei_val", lambda : mapper(get_nuclei_dicts(monuseg_val_ids)))
MetadataCatalog.get("nuclei_val").set(thing_classes=["nuclei"])

nuclei_metadata = MetadataCatalog.get("nuclei_train")

In [7]:
from detectron2.engine import DefaultTrainer
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()
import wandb
wandb.init(sync_tensorboard=True)

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("nuclei_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 500    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon)

cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 10000  # originally 1000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 10000  # originally 1000

# https://github.com/gangadhar-p/NucleiDetectron
cfg.RPN_ANCHOR_START_SIZE = 8 
cfg.RPN_ASPECT_RATIOS =  (0.2, 0.5, 1, 2, 5)
cfg.ROI_XFORM_RESOLUTION = 14
cfg.DETECTIONS_PER_IM: 500

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
[34m[1mwandb[0m: Wandb version 0.10.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


KeyboardInterrupt: 

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
cfg.TEST.DETECTIONS_PER_IMAGE = 10000
predictor = DefaultPredictor(cfg)


dataset_dicts = get_nuclei_dicts(monuseg_train_ids)

In [None]:
def draw_coco(coco_dict : Dict):
    fig,ax = plt.subplots(1, figsize=(10,10))
    im = imread(coco_dict["file_name"])
        
    ax.imshow(im)
    print("Number of nuclei:", len(coco_dict["annotations"]))
    for obj in coco_dict["annotations"]:
        # Bounding box
        xmin, ymin, xmax, ymax = obj["bbox"]
        bbox = patches.Rectangle((xmin,ymin),xmax-xmin,ymax-ymin,linewidth=1,edgecolor='r',facecolor='none')
        ax.add_patch(bbox)
        
        # Mask
        mask = patches.Polygon(obj["xy"], alpha = 0.3)
        ax.add_patch(mask)
    return ax

def draw_prediction(prediction, ax):
    boxes = prediction["instances"].pred_boxes
    print("Number of predicted nuclei:", len(boxes))
    for box in boxes:
        xmin, ymin, xmax, ymax = box
        bbox = patches.Rectangle((xmin,ymin),xmax-xmin,ymax-ymin,linewidth=1,edgecolor='w',facecolor='none')
        ax.add_patch(bbox)
    
    
for d in random.sample(dataset_dicts, 2):    
    im = imread(d["file_name"])
    ax = draw_coco(d)
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    draw_prediction(outputs, ax)
    plt.show()

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("nuclei_train", cfg, False, output_dir="./output/")
val_loader = trainer.build_test_loader(cfg, "nuclei_train")
print(inference_on_dataset(trainer.model, val_loader, evaluator))