In [None]:
from google.colab import drive
import os
import sys

drive.mount("/content/drive")
os.chdir("drive/MyDrive/PG/pg_iss/")
assert os.getcwd().split("/")[-1] == "pg_iss"
# !pip install -q -U albumentations

In [None]:
import os
import sys

if os.path.split(os.getcwd())[-1] == "nb":
    os.chdir("..")
sys.path.append("lib")

from pycocotools.coco import COCO
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import numpy as np
import json

%load_ext autoreload
%autoreload 2

from lib.coco_handler import *
from lib.copy_and_paste_augm import *

# Create a new COCO annotation set from masks

This chunks takes an existing annotation file (alternatively creates an empty on)
 and adds instance masks extracted from the binary masks.

In [None]:
c_raw = CocoDataset(os.path.join(constants.path_to_anno_dir, "butterfly_anno.json"))
d = os.path.join(constants.path_to_data_dir, "raw", "bug_labelling.csv")
d = pd.read_csv(d)
for _, row in d.iterrows():
    c_raw.add_annotation_from_binary_mask(
        os.path.join(constants.path_to_masks_dir, row["mask"]),
        row["crop_image_name"],
        row["family"],
        row["rough_class"],
        min_area=row["min_area"],
    )
c_raw.show_annotations()
c_raw.to_json(os.path.join(constants.path_to_anno_dir, "all_anno.json"))

Now, all annotations are combined in the file `all_anno.json`.

# Train-validation-test Split for Insect Dataset

Split instances into train and test set by removing the respective annotations from the COCO file and mark the not-included objects with a black overlay.
Images can then be further processed by manually covering the black overlay with background patches.


In [None]:
coco_path = os.path.join(constants.path_to_anno_dir, "all_anno.json")
coco = COCO(coco_path)
raw_img_path = constants.path_to_imgs_dir

## Split annotations and cover instances

In [None]:
train_anno_all = []
test_anno_all = []
valid_anno_all = []
for img_anno in coco.imgs.values():
    img = cv2.imread(os.path.join(raw_img_path, img_anno["file_name"]))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    anns = coco.loadAnns(coco.getAnnIds(img_anno["id"]))

    train_anno = anns[0 : len(anns) // 3 + 1]
    train_anno_all += train_anno
    valid_anno = anns[len(anns) // 3 + 1 : (len(anns) // 3) * 2 + 1]
    valid_anno_all += valid_anno
    test_anno = anns[(len(anns) // 3) * 2 + 1 :]
    test_anno_all += test_anno

    all_mask = coco.annToMask(anns[0])
    for ann in anns:
        all_mask = np.maximum(all_mask, coco.annToMask(ann))

    l = []
    for ann in train_anno:
        l.append(coco.annToMask(ann))
    train_mask = np.max(np.array(l), axis=0)

    l = []
    for ann in valid_anno:
        l.append(coco.annToMask(ann))
    valid_mask = np.max(np.array(l), axis=0)

    l = []
    for ann in test_anno:
        l.append(coco.annToMask(ann))
    test_mask = np.max(np.array(l), axis=0)

    train_img = cv2.bitwise_or(
        img, img, mask=(1 - all_mask - train_mask).astype(np.uint8)
    )
    valid_img = cv2.bitwise_or(
        img, img, mask=(1 - all_mask - valid_mask).astype(np.uint8)
    )
    test_img = cv2.bitwise_or(
        img, img, mask=(1 - all_mask - test_mask).astype(np.uint8)
    )

    cv2.imwrite(
        f"data/split/train/imgs/{img_anno['file_name'].split('.')[0]}-train.tif",
        cv2.cvtColor(train_img, cv2.COLOR_RGB2BGR),
    )
    cv2.imwrite(
        f"data/split/validation/imgs/{img_anno['file_name'].split('.')[0]}-validation.tif",
        cv2.cvtColor(valid_img, cv2.COLOR_RGB2BGR),
    )
    cv2.imwrite(
        f"data/split/test/imgs/{img_anno['file_name'].split('.')[0]}-test.tif",
        cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR),
    )

## Create train set annotation

In [None]:
path = "data/split/train/train_anno.json"
with open(coco_path) as json_file:
    train_coco = json.load(json_file)
train_coco["annotations"] = train_anno_all

for x in train_coco["images"]:
    x["file_name"] = f"{x['file_name'].split('.')[0]}-train.tif"

with open(path, "w+") as f:
    f.write(json.dumps(train_coco, indent=4, sort_keys=False))

### Verify train set

In [None]:
coco_train = COCO(path)
for i in coco_train.imgs:
    annIds = coco_train.getAnnIds(imgIds=i, iscrowd=None)
    anns = coco_train.loadAnns(annIds)
    img = cv2.imread(
        os.path.join("data/split/train/imgs", coco_train.imgs[i]["file_name"])
    )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 15))
    plt.axis("off")
    plt.imshow(img)
    coco.showAnns(anns)
    plt.show()

## Create validation set annotation

In [None]:
path = "data/split/validation/validation_anno.json"
with open(coco_path) as json_file:
    validation_coco = json.load(json_file)
validation_coco["annotations"] = valid_anno_all

for x in validation_coco["images"]:
    x["file_name"] = f"{x['file_name'].split('.')[0]}-validation.tif"

with open(path, "w+") as f:
    f.write(json.dumps(validation_coco, indent=4, sort_keys=False))

### Verify validation set

In [None]:
coco_test = COCO(path)
for i in coco_test.imgs:
    annIds = coco_test.getAnnIds(imgIds=i, iscrowd=None)
    anns = coco_test.loadAnns(annIds)
    img = cv2.imread(
        os.path.join("data/split/validation/imgs", coco_test.imgs[i]["file_name"])
    )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 15))
    plt.axis("off")
    plt.imshow(img)
    coco.showAnns(anns)
    plt.show()

## Create test set annotation

In [None]:
path = "data/split/test/test_anno.json"
with open(coco_path) as json_file:
    test_coco = json.load(json_file)
test_coco["annotations"] = test_anno_all

for x in test_coco["images"]:
    x["file_name"] = f"{x['file_name'].split('.')[0]}-test.tif"

with open(path, "w+") as f:
    f.write(json.dumps(test_coco, indent=4, sort_keys=False))

### Verify test set

In [None]:
coco_test = COCO(path)
for i in coco_test.imgs:
    annIds = coco_test.getAnnIds(imgIds=i, iscrowd=None)
    anns = coco_test.loadAnns(annIds)
    img = cv2.imread(
        os.path.join("data/split/test/imgs", coco_test.imgs[i]["file_name"])
    )
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(10, 15))
    plt.axis("off")
    plt.imshow(img)
    coco.showAnns(anns)
    plt.show()

# Create Object Pools for Copy and Paste Augmentation

In [None]:
coco = COCO("data/split/train/train_anno.json")
pc = PatchCreator(
    coco,
    "data/split/train/imgs/",
    os.path.join(constants.path_to_copy_and_paste, "created_patches"),
)
for img in coco.imgs.values():
    pc(img, dilation=1, blurr=0)

This created a folder for each object category and placed the individual objects inside.
The paths to folder have to specified in the configs for the CAP patch pools.