In [None]:
import torch

TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

torch.cuda.empty_cache()

### Setup detectron2

In [None]:
# Reference - https://github.com/facebookresearch/detectron2

# Install detectron2 that matches the above pytorch version
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html
#!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html

In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
setup_logger(name="mask2former")

### Setup Mask2Former

In [None]:
# Reference - https://github.com/facebookresearch/Mask2Former

%cd /pt/segmentation

import os
m2f_dir_exists = os.path.exists(os.path.join(os.getcwd(), 'Mask2Former'))
if not m2f_dir_exists:
    print('Cloning https://github.com/facebookresearch/Mask2Former.git')
    !git clone https://github.com/facebookresearch/Mask2Former.git
%cd /pt/segmentation/Mask2Former

!pip install -U opencv-python
!pip install git+https://github.com/cocodataset/panopticapi.git
!pip install -r requirements.txt
%cd mask2former/modeling/pixel_decoder/ops
!python setup.py build install
%cd ../../../../

In [None]:
# compile MultiScaleDeformableAttention CUDA op - Restart Kernel afterwards
%cd /pt/segmentation/Mask2Former/mask2former/modeling/pixel_decoder/ops/
!sh /pt/segmentation/Mask2Former/mask2former/modeling/pixel_decoder/ops/make.sh

In [None]:
%cd /pt/segmentation/Mask2Former/

# import some common libraries
import numpy as np
import cv2
import torch

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
from detectron2.projects.deeplab import add_deeplab_config
coco_metadata = MetadataCatalog.get("coco_2017_val_panoptic")

# import Mask2Former project
from mask2former import add_maskformer2_config

from IPython import display

### Image setup

In [None]:
# Reference - https://github.com/googlecolab/colabtools/blob/main/google/colab/patches/__init__.py

import cv2
from IPython import display
import PIL


def cv2_imshow(a):
  """A replacement for cv2.imshow() for use in Jupyter notebooks.
  Args:
    a : np.ndarray. shape (N, M) or (N, M, 1) is an NxM grayscale image. shape
      (N, M, 3) is an NxM BGR color image. shape (N, M, 4) is an NxM BGRA color
      image.
  """
  a = a.clip(0, 255).astype('uint8')
  # cv2 stores colors as BGR; convert to RGB
  if a.ndim == 3:
    if a.shape[2] == 4:
      a = cv2.cvtColor(a, cv2.COLOR_BGRA2RGBA)
    else:
      a = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
  display.display(PIL.Image.fromarray(a))

In [None]:
!wget http://images.cocodataset.org/val2017/000000005477.jpg -q -O /pt/segmentation/input/plane.jpg

image_name = "plane.jpg"
im = cv2.imread("/pt/segmentation/input/" + image_name)
cv2_imshow(im)

### Run predictor

In [None]:
cfg = get_cfg()
add_deeplab_config(cfg)
add_maskformer2_config(cfg)
cfg.merge_from_file("configs/coco/panoptic-segmentation/swin/maskformer2_swin_large_IN21k_384_bs16_100ep.yaml")
cfg.MODEL.WEIGHTS = 'https://dl.fbaipublicfiles.com/maskformer/mask2former/coco/panoptic/maskformer2_swin_large_IN21k_384_bs16_100ep/model_final_f07440.pkl'
cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True
cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = True
cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = True
predictor = DefaultPredictor(cfg)

In [None]:
outputs = predictor(im)

### Show predictions

In [None]:
# Show panoptic/instance/semantic predictions: 
v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"), outputs["panoptic_seg"][1]).get_image()

v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
instance_result = v.draw_instance_predictions(outputs["instances"].to("cpu")).get_image()

v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
semantic_result = v.draw_sem_seg(outputs["sem_seg"].argmax(0).to("cpu")).get_image()

print("Panoptic segmentation (top), instance segmentation (middle), semantic segmentation (bottom)")
cv2_imshow(np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1])

In [None]:
#cv2.imwrite("/pt/segmentation/output/panoptic_result_" + image_name, panoptic_result)
#cv2.imwrite("/pt/segmentation/output/instance_result_" + image_name, instance_result)
#cv2.imwrite("/pt/segmentation/output/semantic_result_" + image_name, semantic_result)

In [None]:
combined_vis = np.concatenate((panoptic_result, instance_result, semantic_result), axis=1)
cv2.imwrite("/pt/segmentation/output/combined_result_" + image_name, combined_vis)

In [None]:
from IPython import display
from PIL import Image
print("Panoptic segmentation (left), instance segmentation (middle), semantic segmentation (right)")
display.display(PIL.Image.fromarray(combined_vis))

In [None]:
torch.cuda.memory_summary()