In [1]:
import os
os.chdir("../")

In [79]:
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import structures

# Model
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer

# Others
import torch
import numpy as np
import pycocotools
from pycocotools.coco import COCO
from matplotlib import pyplot as plt
import cv2
from PIL import Image, ImageDraw
from imantics import Polygons, Mask
from skimage import measure

## Model Setup

In [3]:
cfg = get_cfg()
cfg.MODEL.DEVICE = "cpu"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [4]:
cfg = get_cfg()
cfg.MODEL.DEVICE = "cpu"
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 15
cfg.OUTPUT_DIR = './output/keypoint_v0'
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8   # set a custom testing threshold
predictor_kp = DefaultPredictor(cfg)

## Generate Prediction

In [81]:
res = []
for file_path in os.listdir("data/raw/test/images/"):
    img = cv2.imread("data/raw/test/images/" + file_path)
    outputs = predictor(img)
    outputs_kp = predictor_kp(img)
    print(file_path)
    if len(outputs["instances"]) == 0:
        res.append(file_path)
    else:
        print(len(outputs["instances"]))
        selected_kp_idx = structures.pairwise_iou(outputs["instances"].pred_boxes, outputs_kp["instances"].pred_boxes).argmax(1)
        for i in range(len(outputs["instances"])):
            output_pred = outputs["instances"].pred_masks[i]
            output_pred = np.array(output_pred, dtype=np.uint8) 
            contours = measure.find_contours(output_pred, 0.5)

            output_seg = []
            for contour in contours:
                contour = np.flip(contour, axis=1)
                segmentation = contour.ravel().tolist()
                output_seg.append(segmentation)

            output_kp_pred = outputs_kp["instances"].pred_keypoints[selected_kp_idx[i]]
            
            seg_all = ''
            for a in output_seg:
                seg = ''
                for i, b in enumerate(a):
                    if i+1 == len(a):
                        seg = seg + str(b)
                    else:
                        seg = seg + str(b) + ","
                seg = "(" + seg + ")"
                seg_all = seg_all + seg
            seg_all = "[" + seg_all + "]"
            
            base = ','.join(map(str, torch.round(output_kp_pred.view(-1)).tolist()))
            res.append(file_path + ',' + base + seg_all)

893965.png
1
040364.png
1
223135.jpg
1
581725.png
1
617674.png
1
482414.jpg
2


RuntimeError: cannot perform reduction function argmax on a tensor with no elements because the operation does not have an identity

In [9]:
with open('data/sub/baseline_2d/solution/images/annotations/solution.txt', 'w') as f:
    for item in res:
        f.write("%s\n" % item)

## Test

In [5]:
im = cv2.imread("data/raw/train/images/118330.png")
outputs = predictor(im)
outputs_kp = predictor_kp(im)

In [9]:
outputs

{'instances': Instances(num_instances=1, image_height=1080, image_width=1920, fields=[pred_boxes: Boxes(tensor([[ 532.4975,  770.8593, 1002.0208,  993.7135]])), scores: tensor([0.9595]), pred_classes: tensor([0]), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          ...,
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False],
          [False, False, False,  ..., False, False, False]]])])}

In [12]:
outputs_kp["instances"].pred_boxes

Boxes(tensor([[ 533.6050,  769.1152,  964.2634,  997.7163],
        [ 885.0015,  512.8485, 1169.5735, 1070.1250],
        [1165.2817,  551.8598, 1397.6426, 1016.7322],
        [1069.8679,  418.4604, 1175.0128,  681.3787],
        [   0.0000,  979.8174,  114.6908, 1078.4587],
        [1018.3921,  293.1704, 1099.1469,  484.2839],
        [1149.9778,  296.8847, 1205.8925,  485.1429]]))

In [16]:
structures.pairwise_iou(outputs_kp["instances"].pred_boxes, outputs_kp["instances"].pred_boxes)

tensor([[1.0000, 0.0758, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0758, 1.0000, 0.0075, 0.0992, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0075, 1.0000, 0.0094, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0992, 0.0094, 1.0000, 0.0000, 0.0468, 0.0457],
        [0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0468, 0.0000, 1.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0457, 0.0000, 0.0000, 1.0000]])

In [18]:
structures.pairwise_iou(outputs["instances"].pred_boxes, outputs_kp["instances"].pred_boxes).argmax(1)

tensor([0])

In [22]:
structures.pairwise_iou(outputs_kp["instances"].pred_boxes[1:4], outputs_kp["instances"].pred_boxes).argmax(1)

tensor([1, 2, 3])

In [27]:
outputs_kp["instances"].pred_keypoints[0]

tensor([[5.7177e+02, 8.1440e+02, 1.8812e-02],
        [5.5737e+02, 8.3885e+02, 5.8443e-02],
        [5.8762e+02, 8.7910e+02, 2.2848e-02],
        [6.6395e+02, 8.0578e+02, 4.1520e-02],
        [7.2589e+02, 7.8134e+02, 1.9490e-02],
        [7.2733e+02, 7.8134e+02, 6.6478e-03],
        [6.0634e+02, 8.9636e+02, 5.3634e-02],
        [6.0202e+02, 9.6824e+02, 2.6893e-01],
        [5.5449e+02, 9.6105e+02, 9.1407e-02],
        [7.5758e+02, 8.8629e+02, 4.5159e-02],
        [8.7856e+02, 8.8054e+02, 6.2156e-02],
        [9.4050e+02, 9.4668e+02, 1.3741e-02],
        [7.7054e+02, 8.8773e+02, 3.1004e-01],
        [8.4832e+02, 9.3661e+02, 7.8363e-02],
        [5.5737e+02, 8.5466e+02, 2.9826e-02]])