In [1]:
import json
from sklearn.model_selection import train_test_split
import os
import shutil
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog


In [2]:
with open("imagesWithHotspots.txt", "r") as infile:
    annotations = [line.strip() for line in infile.readlines() if len(line.strip()) > 0]

In [8]:
imagefolder="images_renamed"

dataset = []
# collecting all signs and readings
sign_ids = list()
reading_ids = list()
for anno in annotations:

    try:

        textname, size, hotspots = anno.split(":")

        uuid =  textname.split("_")[0]

        height, width = [int(val) for val in size.split("x")]

        fname = f"{imagefolder}/{uuid}.jpg"
    except ValueError as e:
        print(anno)
        pass

    annos = []

    try:

        for hotspot in hotspots.split(";"):
            if len(hotspot) > 0:
                classes, coordslist = hotspot.split("~")

                sign,reading = [elem.split("_")[1] for elem in classes.split("/")]
                # assign the sign and reading ID 
        
                if sign not in sign_ids:
                    sign_ids.append(sign)

                sign_id = sign_ids.index(sign)

                if reading not in reading_ids:
                    reading_ids.append(reading)

                reading_id = reading_ids.index(reading)

                coords = [float(coord) for coord in coordslist.split(",")]
                # only 1 category for now - just sign or non-sign
                annos.append({"bbox": coords, "bbox_mode":0, "sign":sign, "reading": reading, "sign_id": sign_id, "reading_id": reading_id, "category_id": sign_id})
    except ValueError as e:
        print("HOTSPOT FAILURE")
        # print(hotspots)
        print(hotspot)
        pass

    dataset.append({"file_name":fname, "height":height, "width":width, "image_id": uuid, "annotations": annos})

In [9]:
with open("hotspots_labeled.json", "w") as outf:
    json.dump(dataset, outf)

In [10]:
with open("signs.txt", "w") as cats:
    cats.write("\n".join(sign_ids))

In [11]:
train_imgs, test_imgs = train_test_split(dataset, test_size=0.1)
with open("hotspots_train.json", "w") as outf:
    json.dump(train_imgs, outf)
with open("hotspots_test.json", "w") as outf:
    json.dump(test_imgs, outf)

In [25]:
for startpath in os.listdir("images"):
    # edit the paths
    root, ext = os.path.splitext(startpath)
    newname, _ = startpath.split("$")
    shutil.copy2("images/" + startpath, imagefolder + "/" + newname + ext)
