In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from lib.coco_handler import *
from lib.copy_and_paste_augm import *

from pycocotools.coco import COCO

%load_ext autoreload
%autoreload 2

# Create a new COCO annotation set from masks

This chunks takes an existing annotation file (alternatively creates an empty on)
 and adds instance masks extracted from the binary masks.

In [None]:
c_raw = CocoDataset(os.path.join(constants.path_to_anno_dir, "butterfly_anno.json"))
d = os.path.join(constants.path_to_data_dir, "raw", "bug_labelling.csv")
d = pd.read_csv(d)
for _, row in d.iterrows():
    c_raw.add_annotation_from_binary_mask(
        os.path.join(constants.path_to_masks_dir, row["mask"]),
        row["crop_image_name"],
        row["family"],
        row["rough_class"],
        min_area=row["min_area"],
    )
c_raw.show_annotations()
c_raw.to_json(os.path.join(constants.path_to_anno_dir, "all_anno.json"))

Now in the file `all_anno.json` all annotations are combined.

# Extract single objects from masks

In [None]:
coco = COCO(os.path.join(constants.path_to_anno_dir, "all_anno.json"))
pc = PatchCreator(
    coco,
    constants.path_to_imgs_dir,
    os.path.join(constants.path_to_output_dir, "patches"),
)
for img in coco.imgs.values():
    pc(img, dilation=1)

This created a folder for each object category and placed the individual objects inside.


You might now want to filter the images for bad quality, partially occluded images etc.
before re-loading the patch pool for the actual CAP image generation.

In [None]:
obj_dir = os.path.join(constants.path_to_copy_and_paste, "objs")
dirs = [os.path.basename(x) for x in glob.glob(os.path.join(obj_dir, "*"))]
cat_ids = [s.split("-")[0] for s in dirs]
cat_labels = [s.split("-")[1] for s in dirs]
# create patch pool dict
patch_pool = {
    cat_label: PatchPool(
        os.path.join(obj_dir, f"{cat_id}-{cat_label}"),
        cat_id=cat_id,
        cat_label=cat_label,
        aug_transforms=None,
        n_augmentations=0,  # only create Pool
        scale=1,
    )
    for cat_id, cat_label in zip(cat_ids, cat_labels)
}

background_pool = BackgroundPool(
    background_dir=os.path.join(constants.path_to_copy_and_paste, "backgrounds"),
    background_anno="background_anno.json",
    max_resolution=(1800, 1500),
)

Specify the scales and augmentations (for simplicity the same for both generators).

In [None]:
d = {
    "Mesembryhmus_purpuralis": {1: 5, 0.25: 5},
    "Smerinthus_ocellata": {1: 5, 1 / 2: 5},
    "Acherontia_atroposa": {1: 5, 1 / 4: 3},
    "bug_proxy_2": {1: 3},
    "bug_proxy_3": {1: 3},
    "bug_proxy_1": {1: 3, 4: 2},
    "Trichotichnus": {1: 3, 4: 2},
}

In [None]:
cpg_coco_train = CocoDataset(
    info=CocoDataset.create_coco_info(
        descr="""
    Image set created by Copy and Paste data augmentation
""",
        contrib="Sebastian Rassmann",
    )
)

In [None]:
cpg = CollectionBoxGenerator(patch_pool, background_pool, d, max_n_objs=150)
for i in range(5):
    img, instance_masks, bboxs, cats, image_mask = cpg.generate()
    img_name = f"train-box-{i}.png"
    cv2.imwrite(
        os.path.join(constants.path_to_copy_and_paste, "output", "train", img_name),
        img,
    )
    cpg_coco_train.add_annotations_from_instance_mask(instance_masks, img_name, cats)
    cpg_coco_train.show_annotations(
        os.path.join(constants.path_to_copy_and_paste, "output", "train")
    )

In [None]:
cpg = RandomGenerator(patch_pool, background_pool, d, max_n_objs=150)
for i in range(5, 10):
    img, instance_masks, bboxs, cats, image_mask = cpg.generate()
    img_name = f"train-box-{i}.png"
    cv2.imwrite(
        os.path.join(constants.path_to_copy_and_paste, "output", "train", img_name),
        img,
    )
    cpg_coco_train.add_annotations_from_instance_mask(instance_masks, img_name, cats)
    cpg_coco_train.show_annotations(
        os.path.join(constants.path_to_copy_and_paste, "output", "train")
    )