In [13]:
import json
from sklearn.model_selection import train_test_split
import os
import shutil
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from typing import Dict
from tqdm import tqdm
from copy import deepcopy


In [3]:
with open("../raw_ochre/imagesWithHotspots_nov_2020.txt", "r") as infile:
    annotations = [line.strip() for line in infile.readlines() if len(line.strip()) > 0]

In [6]:
imagefolder = "/local/ecw/deepscribe-detectron/archive/images_renamed"

In [7]:
dataset = []
# collecting all signs and readings to assign numerical IDs
sign_ids = list()
reading_ids = list()
for anno in annotations:

    textname, size, hotspots = anno.split(":")

    uuid =  textname.split("_")[0]

    height, width = [int(val) for val in size.split("x")]

    fname = f"{imagefolder}/{uuid}.jpg"

    assert os.path.exists(fname), f"image file {fname} does not exist!"

    annos = []

    for hotspot in hotspots.split(";"):
        if len(hotspot) > 0:
            classes, coordslist = hotspot.split("~")
            # Sandra Schloen, Nov 2019:
            #Please note: when I created the new hotspots I labeled them with the signName_signUUID 
            # so that hotspot cutouts of the same sign would sort together in a folder. Previously when I
            #  created this image list I used signUUID_signName, but this time I have swapped them to make
            #  the format more consistent with the hotspot labels; that is, the Name now precedes the UUID
            #  in this new image list too. You’ll need to adjust your code accordingly.

            sign,reading = [elem.split("_")[0] for elem in classes.split("/")]

            # assign the sign and reading ID 
    
            if sign not in sign_ids:
                sign_ids.append(sign)

            sign_id = sign_ids.index(sign)

            if reading not in reading_ids:
                reading_ids.append(reading)

            reading_id = reading_ids.index(reading)

            coords = [float(coord) for coord in coordslist.split(",")]
            # only 1 category for now - just sign or non-sign
            annos.append({"bbox": coords, "bbox_mode":0, "sign":sign, "reading": reading, "sign_id": sign_id, "reading_id": reading_id, "category_id": sign_id})

    dataset.append({"file_name":fname, "height":height, "width":width, "image_id": uuid, "annotations": annos})

In [12]:
# crop images to remove "empty" space of images
autocropped_hotspots = []

outfolder = "/local/ecw/deepscribe-detectron/data_nov_2020/images_cropped"
annotated = "/local/ecw/deepscribe-detectron/data_nov_2020/images_annotated"

for hotspot in tqdm(dataset):
    # don't keep if there's only one sign!
    # TODO: check thresholds here. 

    if len(hotspot["annotations"]) > 1:
        new_hotspot = deepcopy(hotspot)

        points = [anno["bbox"] for anno in hotspot["annotations"]]
        
        # dealing with negative-valued coordinates
        min_x0 = int(max(0, min([pt[0] for pt in points])))
        max_x1 = int(max([pt[2] for pt in points]))

        min_y0 = int(max(0, min([pt[1] for pt in points])))
        max_y1 = int(max([pt[3] for pt in points]))

        img = cv2.imread(hotspot["file_name"])


        cropped = img[min_y0:max_y1, min_x0:max_x1, :]


        # adjust points - new origin is min_x0, min_y0


        new_hotspot["height"] = cropped.shape[0]
        new_hotspot["width"] = cropped.shape[1]
        new_hotspot["file_name"] = basename(hotspot["file_name"])
        new_hotspot["bbox_mode"] = BoxMode.XYXY_ABS

        for anno in new_hotspot["annotations"]:
            old_bbox = anno["bbox"]

            anno["bbox"] = [
                max(0, old_bbox[0]) - min_x0,
                old_bbox[1] - min_y0,
                old_bbox[2] - min_x0,
                old_bbox[3] - min_y0,
            ]

        

        cv2.imwrite(outfolder + "/" + new_hotspot["file_name"], cropped)
        autocropped_hotspots.append(new_hotspot)
        visualizer = Visualizer(cropped[:, :, ::-1], scale=0.5, metadata={"thing_classes": sign_ids})
        out = visualizer.draw_dataset_dict(new_hotspot)
        cv2.imwrite(annotated + "/" + basename(hotspot["file_name"]), out.get_image()[:, :, ::-1])


  0%|          | 0/5013 [00:00<?, ?it/s]


NameError: name 'deepcopy' is not defined