In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
%pip install git+https://github.com/cocodataset/panopticapi.git
!git clone https://github.com/bytedance/fc-clip.git

In [None]:
%cd fc-clip
%pip install -r requirements.txt

In [None]:
%cd fcclip/modeling/pixel_decoder/ops

In [None]:
!sh make.sh

In [None]:
%cd ../../../..

In [None]:
%pip install open-clip-torch==2.24.0

In [8]:
import os
import torch
import detectron2
from detectron2.engine import DefaultPredictor
from detectron2.data.datasets import load_coco_json
from detectron2.evaluation import inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.projects.deeplab import add_deeplab_config
from fcclip import (
    COCOInstanceNewBaselineDatasetMapper,
    InstanceSegEvaluator,
    MaskFormerInstanceDatasetMapper,
    add_maskformer2_config,
    add_fcclip_config
)

In [9]:
data_dir_path = "/content/drive/MyDrive/instseg/data/"

In [10]:
def register_coco_dataset(name, json_file, image_root):
    detectron2.data.DatasetCatalog.register(
        name,
        lambda: load_coco_json(json_file, image_root, name),
    )
    detectron2.data.MetadataCatalog.get(name).set(
        thing_classes=[
            "Can",
            "Other",
            "Bottle",
            "Bottle cap",
            "Cup",
            "Lid",
            "Plastic bag + wrapper",
            "Pop tab",
            "Straw",
            "Cigarette",
        ]
    )

In [11]:
def configure_model():
    cfg = detectron2.config.get_cfg()
    add_deeplab_config(cfg)
    add_maskformer2_config(cfg)
    add_fcclip_config(cfg)
    cfg.merge_from_file("configs/coco/panoptic-segmentation/fcclip/fcclip_convnext_large_eval_coco.yaml")
    cfg.MODEL.WEIGHTS = (
        "/content/drive/MyDrive/instseg/fcclip_cocopan_r50.pth"
    )
    cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = True
    cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = False
    cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False
    cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 10  # Set the number of classes to match TACO10
    cfg.MODEL.FC_CLIP.TEXT_SEQ_LEN = 128  # Text sequence length for CLIP
    cfg.DATASETS.TEST = ("taco10_test",)
    cfg.MODEL.DEVICE = "cuda"
    cfg.freeze()
    return cfg

In [12]:
register_coco_dataset(
        name="taco10_test",
        json_file=f"{data_dir_path}mapped_annotations_0_test.json",
        image_root=f"{data_dir_path}images/",
)

In [13]:
cfg = configure_model()

In [None]:
predictor = DefaultPredictor(cfg)

In [15]:
detectron2.data.MetadataCatalog.get("taco10_test").set(
    json_file=f"{data_dir_path}mapped_annotations_0_test.json"
)

namespace(name='taco10_test',
          thing_classes=['Can',
                         'Other',
                         'Bottle',
                         'Bottle cap',
                         'Cup',
                         'Lid',
                         'Plastic bag + wrapper',
                         'Pop tab',
                         'Straw',
                         'Cigarette'],
          json_file='/content/drive/MyDrive/instseg/data/mapped_annotations_0_test.json')

In [16]:
evaluator = InstanceSegEvaluator("taco10_test",output_dir="./output")

In [None]:
test_loader = build_detection_test_loader(cfg, dataset_name="taco10_test", mapper=COCOInstanceNewBaselineDatasetMapper(cfg, is_train=True))

In [18]:
evaluation_results = inference_on_dataset(predictor.model, test_loader, evaluator)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.

In [19]:
print(evaluation_results)

OrderedDict([('bbox', {'AP': 0.0, 'AP50': 0.0, 'AP75': 0.0, 'APs': 0.0, 'APm': 0.0, 'APl': 0.0, 'AP-Can': 0.0, 'AP-Other': 0.0, 'AP-Bottle': 0.0, 'AP-Bottle cap': 0.0, 'AP-Cup': 0.0, 'AP-Lid': 0.0, 'AP-Plastic bag + wrapper': 0.0, 'AP-Pop tab': 0.0, 'AP-Straw': 0.0, 'AP-Cigarette': 0.0}), ('segm', {'AP': 0.0, 'AP50': 0.0, 'AP75': 0.0, 'APs': 0.0, 'APm': 0.0, 'APl': 0.0, 'AP-Can': 0.0, 'AP-Other': 0.0, 'AP-Bottle': 0.0, 'AP-Bottle cap': 0.0, 'AP-Cup': 0.0, 'AP-Lid': 0.0, 'AP-Plastic bag + wrapper': 0.0, 'AP-Pop tab': 0.0, 'AP-Straw': 0.0, 'AP-Cigarette': 0.0})])
