In [5]:
import cv2
import json
from copy import deepcopy
from os.path import basename
from tqdm import tqdm
from detectron2.utils.visualizer import Visualizer
from sklearn.model_selection import train_test_split
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode

In [6]:
# autocropping images to minimize unannotated extent 

with open("hotspots_labeled.json", "r") as inf:
    hotspots = json.load(inf)

In [7]:
with open("signs.txt", "r") as sgns:
    signs = [sign.strip() for sign in sgns.readlines()]

In [8]:
autocropped_hotspots = []

outfolder = "images_cropped_categories"
annotated = "images_cropped_annotated_categories"

for hotspot in tqdm(hotspots):
    # don't keep if there's only one sign!
    # TODO: check thresholds here. 

    if len(hotspot["annotations"]) > 1:
        new_hotspot = deepcopy(hotspot)

        points = [anno["bbox"] for anno in hotspot["annotations"]]
        
        # dealing with negative-valued coordinates
        min_x0 = int(max(0, min([pt[0] for pt in points])))
        max_x1 = int(max([pt[2] for pt in points]))

        min_y0 = int(max(0, min([pt[1] for pt in points])))
        max_y1 = int(max([pt[3] for pt in points]))

        img = cv2.imread(hotspot["file_name"])


        cropped = img[min_y0:max_y1, min_x0:max_x1, :]


        # adjust points - new origin is min_x0, min_y0


        new_hotspot["height"] = cropped.shape[0]
        new_hotspot["width"] = cropped.shape[1]
        new_hotspot["file_name"] = outfolder + "/" + basename(hotspot["file_name"])
        new_hotspot["bbox_mode"] = BoxMode.XYXY_ABS

        for anno in new_hotspot["annotations"]:
            old_bbox = anno["bbox"]

            anno["bbox"] = [
                max(0, old_bbox[0]) - min_x0,
                old_bbox[1] - min_y0,
                old_bbox[2] - min_x0,
                old_bbox[3] - min_y0,
            ]

        

        cv2.imwrite(new_hotspot["file_name"], cropped)
        autocropped_hotspots.append(new_hotspot)
        visualizer = Visualizer(cropped[:, :, ::-1], scale=0.5, metadata={"thing_classes": signs})
        out = visualizer.draw_dataset_dict(new_hotspot)
        cv2.imwrite(annotated + "/" + basename (hotspot["file_name"]), out.get_image()[:, :, ::-1])




100%|██████████| 5010/5010 [32:48<00:00,  2.54it/s]


In [9]:
with open("hotspots_labeled_autocropped.json", "w") as outf:
    json.dump(autocropped_hotspots, outf)

In [10]:
train_hotspots, test_hotspots = train_test_split(autocropped_hotspots, test_size=0.15)

with open("hotspots_labeled_train.json", "w") as outf:
    json.dump(train_hotspots, outf)


with open("hotspots_labeled_test.json", "w") as outf:
    json.dump(test_hotspots, outf)

