In [None]:
!pip install -q 'git+https://github.com/facebookresearch/detectron2.git'
!pip install imagecodecs
!pip install iterative-stratification
!pip install ensemble-boxes

In [None]:
from pathlib import Path
from skimage import io as skio
import numpy as np
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit

ROOT = Path('/path/to/hw3-data-release')
TRAIN_DIR = ROOT / 'train'
TEST_DIR = ROOT / 'test_release'
VAL_RATIO = 0.15

img_dirs = sorted([p for p in TRAIN_DIR.iterdir() if p.is_dir()])
Y = []
for d in img_dirs:
    flags = [0, 0, 0, 0]
    for m in d.glob('class*.tif'):
        cls = int(m.stem.replace('class', '')) - 1
        if np.any(skio.imread(m)):
            flags[cls] = 1
    Y.append(flags)
Y = np.array(Y)

msss = MultilabelStratifiedShuffleSplit(
    n_splits=1, test_size=VAL_RATIO, random_state=2025)
train_idx, val_idx = next(msss.split(np.zeros(len(Y)), Y))

train_names = {img_dirs[i].name for i in train_idx}
val_names = {img_dirs[i].name for i in val_idx}

print(f"Train folders: {len(train_names)} | Val folders: {len(val_names)}")

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from pycocotools import mask as mask_utils

def make_cells_dicts(img_dir, allowed_names, is_test=False):
    records = []
    for idx, d in enumerate(sorted(img_dir.iterdir())):
        if (not d.is_dir()) or (d.name not in allowed_names):
            continue
        img_path = d / 'image.tif'
        h, w = skio.imread(img_path).shape[: 2]
        rec = {
            "file_name": str(img_path),
            "image_id": idx,
            "height": h,
            "width": w,
            "annotations": []
        }
        if not is_test:
            annos = []
            for mask_path in d.glob('class*.tif'):
                cls = int(mask_path.stem.replace('class', '')) - 1
                arr = skio.imread(mask_path)
                for inst in np.unique(arr)[1:]:
                    m = (arr == inst).astype('uint8')
                    rle = mask_utils.encode(np.asfortranarray(m))
                    rle["counts"] = rle["counts"].decode('utf-8')
                    ys, xs = np.where(m)
                    xmin, ymin = xs.min(), ys.min()
                    annos.append({
                        "bbox": [int(xmin), int(ymin),
                                 int(xs.max() - xmin + 1),
                                 int(ys.max() - ymin + 1)],
                        "bbox_mode": BoxMode.XYWH_ABS,
                        "category_id": cls,
                        "segmentation": rle,
                        "iscrowd": 0
                    })
            rec["annotations"] = annos
        records.append(rec)
    return records

DatasetCatalog.register(
    "cells_train",
    lambda: make_cells_dicts(TRAIN_DIR, train_names, is_test=False)
)
DatasetCatalog.register(
    "cells_val",
    lambda: make_cells_dicts(TRAIN_DIR, val_names, is_test=False)
)
DatasetCatalog.register(
    "cells_test",
    lambda: make_cells_dicts(ROOT / 'test_release', set(), is_test=True)
)
MetadataCatalog.get("cells_train").set(thing_classes=['c1', 'c2', 'c3', 'c4'])
MetadataCatalog.get("cells_val").set(thing_classes=['c1', 'c2', 'c3', 'c4'])
MetadataCatalog.get("cells_test").set(thing_classes=['c1', 'c2', 'c3', 'c4'])

print("✅ cells_train / cells_val / cells_test 都註冊完了！")

In [None]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.utils.logger import setup_logger
from detectron2.evaluation import COCOEvaluator
import warnings
import os

warnings.filterwarnings(
    "ignore",
    message=".*torch.cuda.amp.autocast.*",
    category=FutureWarning
)

setup_logger()
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(
    "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))

cfg.MODEL.DEVICE = "cuda"
cfg.SOLVER.AMP.ENABLED = True
cfg.SOLVER.CLIP_GRADIENTS.ENABLED = True
cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
cfg.DATASETS.TRAIN = ("cells_train",)
cfg.DATASETS.TEST = ("cells_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8], [16], [32], [64], [128]]
cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]
cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 15000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 3000
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.BACKBONE.FREEZE_AT = 2
cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 28
cfg.MODEL.ROI_MASK_HEAD.NUM_CONV = 4
cfg.MODEL.ROI_MASK_HEAD.CONV_DIM = 256
cfg.INPUT.MIN_SIZE_TRAIN = (800,)
cfg.INPUT.MAX_SIZE_TRAIN = 1333
cfg.INPUT.RANDOM_FLIP = "horizontal"
# cfg.INPUT.RANDOM_FLIP = "vertical"
cfg.DATALOADER.SAMPLER_TRAIN = "RepeatFactorTrainingSampler"
cfg.DATALOADER.REPEAT_THRESHOLD = 0.05
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 2e-3
cfg.SOLVER.WEIGHT_DECAY = 1e-4
cfg.SOLVER.MAX_ITER = 6000
cfg.SOLVER.WARMUP_ITERS = 500
cfg.SOLVER.WARMUP_FACTOR = 1. / 1000
cfg.SOLVER.STEPS = (4500, 5500)
cfg.SOLVER.GAMMA = 0.1
cfg.SOLVER.CHECKPOINT_PERIOD = 500
cfg.TEST.AUG.ENABLED = True
cfg.TEST.AUG.MIN_SIZES = (800,)
cfg.TEST.AUG.MAX_SIZE = 1333
cfg.TEST.AUG.FLIP = True
cfg.SEED = 2025
cfg.OUTPUT_DIR = "/path/to/output"
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.TEST.EVAL_PERIOD = 500
cfg.INPUT.MASK_FORMAT = "bitmask"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
        os.makedirs(output_folder, exist_ok=True)
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_json("/path/to/output/metrics.json", lines=True)

total_loss = df.dropna(subset=["total_loss"])
plt.plot(total_loss["iteration"],
         total_loss["total_loss"],
         label="total_loss")

loss_mask = df.dropna(subset=["loss_mask"])
plt.plot(loss_mask["iteration"],
         loss_mask["loss_mask"],
         label="loss_mask")

segm_AP = df.dropna(subset=["segm/AP"])
plt.plot(segm_AP["iteration"],
         segm_AP["segm/AP"],
         label="segm/AP")

segm_AP50 = df.dropna(subset=["segm/AP50"])
plt.plot(segm_AP50["iteration"],
         segm_AP50["segm/AP50"],
         label="segm/AP50")

segm_AP_c1 = df.dropna(subset=["segm/AP-c1"])
plt.plot(segm_AP_c1["iteration"],
         segm_AP_c1["segm/AP-c1"],
         label="segm/AP-c1")

segm_AP_c2 = df.dropna(subset=["segm/AP-c2"])
plt.plot(segm_AP_c2["iteration"],
         segm_AP_c2["segm/AP-c2"],
         label="segm/AP-c2")

segm_AP_c3 = df.dropna(subset=["segm/AP-c3"])
plt.plot(segm_AP_c3["iteration"],
         segm_AP_c3["segm/AP-c3"],
         label="segm/AP-c3")

segm_AP_c4 = df.dropna(subset=["segm/AP-c4"])
plt.plot(segm_AP_c4["iteration"],
         segm_AP_c4["segm/AP-c4"],
         label="segm/AP-c4")

plt.xlabel("iteration")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import cv2
import json
import numpy as np
from tqdm.auto import tqdm
from detectron2.engine import DefaultPredictor

# cfg.MODEL.WEIGHTS = "/path/to/0322_19_model_0002999.pth"
cfg.MODEL.WEIGHTS = "/path/to/0320_18_model_0002999.pth"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.001
cfg.TEST.DETECTIONS_PER_IMAGE = 1000
cfg.MODEL.DEVICE = "cuda"
predictor = DefaultPredictor(cfg)

with open("/path/to/test_image_name_to_ids.json") as f:
    name2id = {x["file_name"]: x["id"] for x in json.load(f)}

submissions = []
for img_file in tqdm(sorted(TEST_DIR.glob("*.tif")), desc="Inference"):
    im = cv2.imread(str(img_file))
    assert im.dtype == np.uint8, f"{img_file} not uint8"

    insts = predictor(im)["instances"].to("cpu")
    for mask, label, score in zip(insts.pred_masks,
                                  insts.pred_classes,
                                  insts.scores):
        rle = mask_utils.encode(np.asfortranarray(mask.numpy()))
        rle["counts"] = rle["counts"].decode("utf-8")
        submissions.append({
            "image_id": name2id[img_file.name],
            "category_id": int(label) + 1,
            "segmentation": rle,
            "score": float(score)
        })

with open("test-results.json", "w") as f:
    json.dump(submissions, f)
print("✅ submission  saved: test-results.json")

In [None]:
import cv2
import json
import numpy as np
import torch
from tqdm.auto import tqdm
from detectron2.engine import DefaultPredictor
from detectron2.structures import Boxes, pairwise_iou
from ensemble_boxes import weighted_boxes_fusion

def build_pred(weight_path):
    cfg.MODEL.WEIGHTS = weight_path
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.001
    cfg.TEST.DETECTIONS_PER_IMAGE = 1000
    cfg.MODEL.DEVICE = "cuda"
    return DefaultPredictor(cfg)

def wbf_one_image(outs, H, W, iou_thr=0.55, skip_thr=0.0):
    boxes, scores, labels = [], [], []
    for inst in outs:
        b = inst.pred_boxes.tensor.numpy()
        b = b / np.array([W, H, W, H])
        s = inst.scores.numpy()
        label = inst.pred_classes.numpy()
        boxes.append(b)
        scores.append(s)
        labels.append(label)

    b, s, label = weighted_boxes_fusion(
        boxes, scores, labels,
        iou_thr=iou_thr, skip_box_thr=skip_thr
    )
    b = b * np.array([W, H, W, H])
    return b, s, label

pred_a = build_pred("/path/to/0320_18_model_0002999.pth")
pred_b = build_pred("/path/to/0322_19_model_0002999.pth")

with open("/path/to/test_image_name_to_ids.json") as f:
    name2id = {x["file_name"]: x["id"] for x in json.load(f)}

submissions = []
for img_file in tqdm(sorted(TEST_DIR.glob("*.tif")), desc="WBF Inference"):
    im = cv2.imread(str(img_file))
    H, W = im.shape[:2]

    inst1 = pred_a(im)["instances"].to("cpu")
    inst2 = pred_b(im)["instances"].to("cpu")

    boxes, scores, labels = wbf_one_image([inst1, inst2], H, W, 0.6, 0.0)

    for xyxy, sc, lb in zip(boxes, scores, labels):
        all_insts = [inst1, inst2]
        best_mask = None
        best_iou = -1
        for inst in all_insts:
            if len(inst) == 0:
                continue
            ious = pairwise_iou(
                Boxes(torch.tensor([xyxy])),
                inst.pred_boxes
            ).numpy()[0]
            idx = ious.argmax()
            if ious[idx] > best_iou:
                best_iou = ious[idx]
                best_mask = inst.pred_masks[idx].numpy()

        if best_mask is None:
            continue

        rle = mask_utils.encode(np.asfortranarray(best_mask.astype(np.uint8)))
        rle["counts"] = rle["counts"].decode("utf-8")
        submissions.append({
            "image_id": name2id[img_file.name],
            "category_id": int(lb) + 1,
            "segmentation": rle,
            "score": float(sc),
        })

with open("test-results.json", "w") as f:
    json.dump(submissions, f)
print("✅ WBF submission saved!")