Generate the same model configs, create model, and load model parameters from last run. Generate predictions on a new dataset, save annotations into json (get initial json from CVAT in COCO format).

In [1]:
import torch
import os
from d2go.runner import GeneralizedRCNNRunner
from d2go.model_zoo import model_zoo

def prepare_for_launch():
    runner = GeneralizedRCNNRunner()
    cfg = runner.get_default_cfg()
    cfg.merge_from_file("c4_faster_rcnn_fbnetv3a_C4.yaml")
    cfg.MODEL_EMA.ENABLED = False
    cfg.DATASETS.TRAIN = ("c4_train",)
    cfg.DATASETS.TEST = ("c4_val",)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = None#model_zoo.get_checkpoint_url("faster_rcnn_fbnetv3a_C4.yaml")  # Let training initialize from model zoo
    cfg.MODEL.DEVICE = "cpu" if ('CI' in os.environ) else "cuda"
    cfg.SOLVER.IMS_PER_BATCH = 8
    cfg.SOLVER.BASE_LR = 0.0005  # pick a good LR
    cfg.SOLVER.MAX_ITER = 50000    # 600 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
    cfg.SOLVER.STEPS = []        # do not decay learning rate
    cfg.SOLVER.OPTIMIZER = 'adamw_mt'
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
    # NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    return cfg, runner

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
cfg, runner = prepare_for_launch()
model = runner.build_model(cfg)
model.load_state_dict(torch.load("output/model_final.pth")['model'])

INFO:d2go.modeling.backbone.fbnet_v2:Build FBNet using unified arch_def:
trunk
- {'block_op': 'conv_k3', 'block_cfg': {'out_channels': 16, 'stride': 2}, 'stage_idx': 0, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 16, 'stride': 1, 'expansion': 1, 'less_se_channels': False}, 'stage_idx': 0, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 32, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 0}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 32, 'stride': 1, 'expansion': 2, 'less_se_channels': False}, 'stage_idx': 1, 'block_idx': 1}
- {'block_op': 'ir_k5', 'block_cfg': {'out_channels': 40, 'stride': 2, 'expansion': 4, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 0}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 40, 'stride': 1, 'expansion': 3, 'less_se_channels': False}, 'stage_idx': 2, 'block_idx': 1}
- {'block_op': 'ir_k3', 'block_cfg': {'out_channels': 40, 'stride': 1, 'expansio

<All keys matched successfully>

In [4]:
class C4Predictor:
    def __init__(self, model, min_size_test=384, max_size_test=510, input_format="RGB"):
        self.model = model
        self.model.eval()

        #self.aug = []#T.ResizeShortestEdge([min_size_test, min_size_test], max_size_test)

        self.input_format = input_format

    def __call__(self, original_image):

        inputs = self.convert_input(original_image)
        predictions = self.model([inputs])[0]
        return predictions

    def convert_input(self, original_image):
        with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
            # Apply pre-processing to image.
            if self.input_format == "RGB":
                # whether the model expects BGR inputs or RGB
                original_image = original_image[:, :, ::-1]
            height, width = original_image.shape[:2]
            #image = self.aug.get_transform(original_image).apply_image(original_image)
            image = torch.as_tensor(original_image.astype("float32").transpose(2, 0, 1))

            inputs = {"image": image, "height": height, "width": width}

            return inputs

Get initial COCO annotations from CVAT (should have an empty 'annotations' field). Rename file to labels.json or change path in following cell.

In [None]:
import json

f = open("datasets/set2/labels.json")

labels_json = json.load(f)

f.close()

In [19]:
import numpy as np
import os
from PIL import Image
import json

DATASET_DIR = "datasets/set2"
LABEL_NAME = "labels_set2"

f = open("datasets/set2/labels.json")
labels_json = json.load(f)
f.close()

predictor = C4Predictor(model, min_size_test=384, max_size_test=510)

img_file_index = {}

for anno in labels_json['images']:
    img_file_index[anno['file_name']] = anno['id']

count = 1
for img_file in os.listdir(os.path.join(DATASET_DIR, "clean")):
    img = Image.open(os.path.join(DATASET_DIR, "clean", img_file))
    outputs = predictor(np.array(img))

    img_id = img_file_index[img_file]

    cls_lbls = outputs["instances"].pred_classes.cpu().detach().numpy()
    bbox = outputs["instances"].pred_boxes

    for i, bbox in enumerate(bbox):
        bbox = bbox.cpu().detach().numpy().tolist()
        w = bbox[3] - bbox[1]
        h = bbox[2] - bbox[0]
        area = w * h
        bbox = [bbox[0], bbox[1], h, w]

        anno_dict = {}
        anno_dict["id"] = count
        anno_dict["image_id"] = img_id
        anno_dict["category_id"] = int(cls_lbls[i]) + 1 #offset required
        anno_dict["segmentation"] = []
        anno_dict["area"] = area
        anno_dict["bbox"] = bbox
        anno_dict["iscrowd"] = 0
        anno_dict["attributes"] = {"occluded": False, "rotation": 0.0}

        count += 1

        labels_json['annotations'].append(anno_dict)


json_object = json.dumps(labels_json, indent=4)

# Writing to sample.json
with open(LABEL_NAME + ".json", "w") as outfile:
    outfile.write(json_object)