In [1]:
import json, os
from datetime import datetime
import glob
import numpy as np
import cv2

In [2]:
biaa = {"name": "BIAA", "in_folder": "/home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations",
    "out_coco_folder": "/home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations/COCO",
    "out_ignore_mask_folder": "/home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations/ignore_mask"}

datasets = [biaa]

LABELME_DIR = "/home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations"
OUT_COCO_JSON = "/home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations/COCO/GT_COCO.json"


In [3]:
CATEGORIES = [
    {"id": 0, "name": "sidewalk_tiles", "supercategory": "sidewalk"},
    {"id": 1, "name": "sidewalk_no_tiles", "supercategory": "sidewalk"},
    {"id": 2, "name": "roadway", "supercategory": "road"},
    {"id": 3, "name": "curb_edge", "supercategory": "sidewalk"},
    {"id": 4, "name": "drainage_inlet", "supercategory": "both"},
    {"id": 5, "name": "gutter", "supercategory": "road"},
    {"id": 6, "name": "access_cover", "supercategory": "both"},
    {"id": 7, "name": "tree_pit", "supercategory": "sidewalk"},
    {"id": 8, "name": "vegetation", "supercategory": "sidewalk"},
    {"id": 9, "name": "street_furniture", "supercategory": "both"},
    {"id": 255, "name": "__ignore__", "supercategory": "__ignore__"}
]

BACKGROUND = 0
LABEL_MAP = {
    "0": "sidewalk_tiles",
    "1": "sidewalk_no_tiles",
    "2": "roadway",
    "3": "curb_edge",
    "4": "drainage_inlet",
    "5": "gutter",
    "6": "access_cover",
    "7": "tree_pit",
    "8": "vegetation",
    "9": "street_furniture",

    # màscara d'ignore anotada com a "255"
    "255": "__ignore__",     # o aquesta
}

cat2id = {c["name"]: c["id"] for c in CATEGORIES}


In [4]:
import os, glob, json
from collections import Counter

all_labels = Counter()

json_files = sorted(glob.glob(os.path.join(LABELME_DIR, "*.json")))
print(f"S'han trobat {len(json_files)} fitxers JSON.")

for jf in json_files:
    with open(jf, "r", encoding="utf-8") as f:
        d = json.load(f)
    for sh in d.get("shapes", []):
        lab = sh.get("label", "")
        all_labels[lab] += 1

print("\nEtiquetes trobades i freqüències:")
for lab, cnt in all_labels.most_common():
    print(f"{lab!r}: {cnt}")

S'han trobat 507 fitxers JSON.

Etiquetes trobades i freqüències:
'0': 1070
'255': 828
'6': 688
'9': 604
'3': 503
'5': 407
'2': 356
'7': 322
'8': 261
'4': 121
'1': 61


In [5]:
def polygon_area(points):
    x = np.array([p[0] for p in points], dtype=np.float64)
    y = np.array([p[1] for p in points], dtype=np.float64)

    return float(0.5 * np.abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))

def polygon_bbox(points):
    xs = [p[0] for p in points]
    ys = [p[1] for p in points]
    x0, y0 = float(min(xs)), float(min(ys))
    x1, y1 = float(max(xs)), float(max(ys))
    
    return [x0, y0, float(x1 - x0), float(y1 - y0)]


In [6]:
image_id = 1
ann_id = 1
images = []
annotations = []

json_files = sorted(glob.glob(os.path.join(LABELME_DIR, "*.json")))
print(f"S'han trobat {len(json_files)} fitxers LabelMe.")

for jf in json_files:
    with open(jf, "r", encoding="utf-8") as f:
        lm = json.load(f)

    # --- METADADES D'IMATGE ---
    h = int(lm["imageHeight"])
    w = int(lm["imageWidth"])

    if lm.get("imagePath"):
        file_name = os.path.basename(lm["imagePath"])
    else:
        base = os.path.splitext(os.path.basename(jf))[0]
        file_name = base + ".jpg"

    images.append({
        "id": image_id,
        "file_name": file_name,
        "height": h,
        "width": w,
        "date_captured": datetime.now().isoformat() + "Z",
    })

    # --- ANOTACIONS ---
    for sh in lm.get("shapes", []):
        raw_label = sh.get("label", "")
        mapped = LABEL_MAP.get(raw_label, raw_label)

        #if mapped in ("__ignore__", "ignore"):
         #   continue  # no afegim anotació COCO

        if mapped not in cat2id:
            continue  # etiqueta desconeguda

        points = sh.get("points", [])
        if not points or len(points) < 3:
            continue

        seg = [float(c) for p in points for c in p]
        area = polygon_area(points)
        bbox = polygon_bbox(points)

        annotations.append({
            "id": ann_id,
            "image_id": image_id,
            "category_id": cat2id[mapped],
            "segmentation": [seg],
            "area": area,
            "bbox": bbox,
            "iscrowd": 0,
        })
        ann_id += 1

    image_id += 1


# === FITXER COCO FINAL ===
coco = {
    "info": {
        "description": "BIAA sidewalk dataset - COCO annotations",
        "version": "1.0",
        "year": 2025,
        "contributor": "Joan Manel",
        "date_created": datetime.now().isoformat() + "Z",
    },
    "licenses": [],
    "images": images,
    "annotations": annotations,
    "categories": CATEGORIES,
}

with open(OUT_COCO_JSON, "w", encoding="utf-8") as f:
    json.dump(coco, f, ensure_ascii=False, indent=2)

print(f"✔ Fitxer COCO generat: {OUT_COCO_JSON}")
print(f"✔ Total imatges: {len(images)}")
print(f"✔ Total anotacions: {len(annotations)}")

S'han trobat 507 fitxers LabelMe.
✔ Fitxer COCO generat: /home/joan_ds/Sandbox/UOC/TFM/data/dataset_500_GT/images/annotations/COCO/GT_COCO.json
✔ Total imatges: 507
✔ Total anotacions: 5221


In [7]:
for dataset in datasets:
    images = []
    annotations = []
    image_id = 0

    name = dataset["name"]
    print(f"Procesamos las anotaciones en formato COCO y las máscaras __ignore__ del dataset {name}.")
    in_folder = dataset["in_folder"]
    out_coco_folder = dataset["out_coco_folder"]
    out_ignore_mask_folder = dataset["out_ignore_mask_folder"]

    for in_path in glob.glob('/'.join([in_folder, '*.json'])):
        with open(in_path, "r", encoding="utf-8") as f:
            lm = json.load(f)
    
        image_id += 1
#    print("Imatge: ", image_id)
    
        file_name = os.path.basename(lm["imagePath"])
        date = file_name.rsplit(".")[0]

        img_h = int(lm["imageHeight"])

        img_w = int(lm["imageWidth"])

        images.append({
    "id": image_id,
    "file_name": file_name,
    "height": img_h,
    "width": img_w,
    "date_captured": date
    })

        ignore_mask = np.zeros((img_h, img_w), dtype=np.uint8)
        out_ignore_mask_filename = "ignore_mask_" + file_name
        out_ignore_mask_filename = out_ignore_mask_filename.replace("jpg", "png")
        out_ignore_mask_path = "/".join([out_ignore_mask_folder, out_ignore_mask_filename])
    

        ann_id = 0

        for sh in lm.get("shapes", []):
            ann_id += 1
#        (print("Label: ", ann_id))
            label = sh.get("label", "")
            points = sh.get("points", [])
            if not points or len(points) < 3:
                continue

            if label == "__ignore__":
                poly_pts = np.array(points, dtype=np.int32)
                cv2.fillPoly(ignore_mask, [poly_pts], 255)
                continue

            if label not in cat2id:
                continue

            seg = [float(coord) for pt in points for coord in pt]
            area = polygon_area(points)
            bbox = polygon_bbox(points)

            annotations.append({
        "id": ann_id,
        "image_id": image_id,
        "category_id": cat2id[label],
        "segmentation": [seg],
        "area": area,
        "bbox": bbox,
        "iscrowd": 0
        })
    
        cv2.imwrite(out_ignore_mask_path, ignore_mask)
    now = datetime.now().isoformat()

    coco = {
    "info": {
        "description": "Sample conversion from LabelMe to COCO (semantic via polygons).",
        "version": "1.0",
        "year": 2025,
        "contributor": "Joan Manel Ramírez Jávega",
        "date_created": now
    },
    "licenses": [],
    "images": images,
    "annotations": annotations,
    "categories": CATEGORIES
}

    coco_filename = str(now) + "_COCO_GT_no_SAM.json"

    with open("/".join([out_coco_folder, coco_filename]), "w", encoding="utf-8") as f:
        json.dump(coco, f, ensure_ascii=False, indent=2)








Procesamos las anotaciones en formato COCO y las máscaras __ignore__ del dataset BIAA.
