論文<br>
https://arxiv.org/abs/2211.06220<br>
<br>
GitHub<br>
https://github.com/SHI-Labs/OneFormer<br>
<br>
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/OneFormer_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 環境セットアップ

## GPU確認

In [None]:
!nvidia-smi

## GitHubからコード取得

In [None]:
%cd /content

!git clone https://github.com/SHI-Labs/OneFormer-Colab.git

# using Commits on Nov 18, 2022
%cd /content/OneFormer-Colab
!git checkout 08ae914313bd1ff4688eb5e58f7845760fd60643

%cd /content
!mv OneFormer-Colab OneFormer

## ライブラリのインストール

In [None]:
%cd /content/OneFormer

# install pytorch
!pip install torch==1.9.0 torchvision==0.10.0 --quiet
# install opencv
!pip install -U opencv-python --quiet
# install detectron2
!python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html --quiet
!pip3 install natten==0.14.2 -f https://shi-labs.com/natten/wheels/cu102/torch1.9/index.html --quiet

# install other
!pip install git+https://github.com/cocodataset/panopticapi.git --quiet
!pip install git+https://github.com/mcordts/cityscapesScripts.git --quiet
!pip install -r requirements.txt --quiet
!pip install ipython-autotime
!pip install imutils

## ライブラリのインポート

In [None]:
%cd /content/OneFormer

# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
setup_logger(name="oneformer")

# Import libraries
import numpy as np
import cv2
import torch
from google.colab.patches import cv2_imshow
import imutils
device = 'cuda' if torch.cuda.is_available() else "cpu"
print("using device is", device)

# Import detectron2 utilities
from detectron2.config import get_cfg
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.data import MetadataCatalog
from demo.defaults import DefaultPredictor
from demo.visualizer import Visualizer, ColorMode

# import OneFormer Project
from oneformer import (
    add_oneformer_config,
    add_common_config,
    add_swin_config,
    add_dinat_config,
    add_convnext_config,
)

# Helper関数の定義

In [None]:
SWIN_CFG_DICT = {
    "cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml",
    "coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml",
    "ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml"
    }

DINAT_CFG_DICT = {
    "cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml",
    "coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml",
    "ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml"
    }

def setup_cfg(dataset, model_path, use_swin):
  # load config from file and command-line arguments
  cfg = get_cfg()
  add_deeplab_config(cfg)
  add_common_config(cfg)
  add_swin_config(cfg)
  add_dinat_config(cfg)
  add_convnext_config(cfg)
  add_oneformer_config(cfg)
  if use_swin:
    cfg_path = SWIN_CFG_DICT[dataset]
  else:
    cfg_path = DINAT_CFG_DICT[dataset]
  cfg.merge_from_file(cfg_path)
  cfg.MODEL.DEVICE = 'cpu'
  cfg.MODEL.WEIGHTS = model_path
  cfg.freeze()
  return cfg

def setup_modules(dataset, model_path, use_swin):
  cfg = setup_cfg(dataset, model_path, use_swin)
  predictor = DefaultPredictor(cfg)
  metadata = MetadataCatalog.get(
    cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused"
  )
  if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:
    from cityscapesscripts.helpers.labels import labels
    stuff_colors = [k.color for k in labels if k.trainId != 255]
    metadata = metadata.set(stuff_colors=stuff_colors)
    
  return predictor, metadata

def panoptic_run(img, predictor, metadata):
  visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
  predictions = predictor(img, "panoptic")
  panoptic_seg, segments_info = predictions["panoptic_seg"]
  out = visualizer.draw_panoptic_seg_predictions(
    panoptic_seg.to(device), segments_info, alpha=0.5
    )
  
  return out

def instance_run(img, predictor, metadata):
  visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
  predictions = predictor(img, "instance")
  instances = predictions["instances"].to(device)
  out = visualizer.draw_instance_predictions(predictions=instances, alpha=0.5)
  return out

def semantic_run(img, predictor, metadata):
  visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, instance_mode=ColorMode.IMAGE)
  predictions = predictor(img, "semantic")
  out = visualizer.draw_sem_seg(
    predictions["sem_seg"].argmax(dim=0).to(device), alpha=0.5
  )
  return out

TASK_INFER = {
    "panoptic": panoptic_run, 
    "instance": instance_run, 
    "semantic": semantic_run
    }

# 学習済みモデルのセットアップ

In [None]:
%cd /content/OneFormer
!mkdir pretrained

use_swin = False

if use_swin == False:
  # download wight, backborn: DiNAT-L, ADE20k dataset
  !wget -c https://shi-labs.com/projects/oneformer/ade20k/250_16_dinat_l_oneformer_ade20k_160k.pth \
        -O pretrained/250_16_dinat_l_oneformer_ade20k_160k.pth
  # init modules
  predictor, metadata = setup_modules("ade20k", "pretrained/250_16_dinat_l_oneformer_ade20k_160k.pth", use_swin)
else:
  # download wight, backborn: Swin-L, ADE20k dataset
  !wget -c https://shi-labs.com/projects/oneformer/ade20k/250_16_swin_l_oneformer_ade20k_160k.pth \
        -O pretrained/250_16_swin_l_oneformer_ade20k_160k.pth
  # init modules
  predictor, metadata = setup_modules("ade20k", "pretrained/250_16_swin_l_oneformer_ade20k_160k.pth", use_swin)

# テスト画像のセットアップ

In [None]:
%cd /content/OneFormer
!mkdir input_img

!wget -c https://www.pakutaso.com/shared/img/thumb/YAT4M3A7518_TP_V.jpg \
      -O input_img/test01.jpg

In [None]:
img = cv2.imread("input_img/test01.jpg")
img = imutils.resize(img, width=640)
cv2_imshow(img)

# panoptic segmentation

In [None]:
# setup task
task = "panoptic"

# inference
%load_ext autotime
out = TASK_INFER[task](img, predictor, metadata).get_image()

# show result
cv2_imshow(out[:, :, ::-1])

# instance segmentation

In [None]:
# setup task
task = "instance"

# inference
%load_ext autotime
out = TASK_INFER[task](img, predictor, metadata).get_image()

# show result
cv2_imshow(out[:, :, ::-1])

# semantic segmentation

In [None]:
# setup task
task = "semantic"

# inference
%load_ext autotime
out = TASK_INFER[task](img, predictor, metadata).get_image()

# show result
cv2_imshow(out[:, :, ::-1])