In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
BASE_PATH = "/content/drive/MyDrive/proyecto final maia"

## Agregando anotaciones para las vertebras faltantes

In [16]:
import json, re
from pathlib import Path
from collections import defaultdict, Counter
import numpy as np

IN  = Path(f"{BASE_PATH}/coco.json")
OUT = Path(f"{BASE_PATH}/annotations_T1_T12_L1_L5_below_T1.json")

# --- target label order ---
THORACIC = [f"T{i}" for i in range(1, 13)]
LUMBAR   = [f"L{i}" for i in range(1, 6)]
VERTEBRA_NAMES = THORACIC + LUMBAR

MIN_VERT_POINTS = 3

def norm_name(s: str) -> str:
    s = (s or "").strip().upper()
    s = re.sub(r"\s+", " ", s)
    if s == "FONDO": s = "F"
    return s

def poly_to_xy(poly):
    arr = np.asarray(poly, dtype=float).reshape(-1, 2)
    return arr[:,0], arr[:,1]

def poly_centroid(poly):
    xs, ys = poly_to_xy(poly)
    return float(xs.mean()), float(ys.mean())

def poly_bbox(poly):
    xs, ys = poly_to_xy(poly)
    x0, y0 = float(xs.min()), float(ys.min())
    x1, y1 = float(xs.max()), float(ys.max())
    return [x0, y0, x1 - x0, y1 - y0]

def poly_area(poly):
    xs, ys = poly_to_xy(poly)
    x_shift = np.roll(xs, -1)
    y_shift = np.roll(ys, -1)
    return float(abs((xs * y_shift - x_shift * ys).sum() * 0.5))

# -------- load COCO --------
coco = json.loads(IN.read_text(encoding="utf-8"))
images = {im["id"]: im for im in coco["images"]}

anns_by_img = defaultdict(list)
for ann in coco["annotations"]:
    name = norm_name(ann.get("name", ""))
    seg  = ann.get("segmentation", [])
    if not isinstance(seg, list):     # polygons only
        continue
    if name not in ("T1", "V"):       # ignore F and others
        continue
    anns_by_img[ann["image_id"]].append({**ann, "_name": name})

categories = [{"id": i+1, "name": n, "supercategory": "spine"} for i, n in enumerate(VERTEBRA_NAMES)]
name_to_id = {c["name"]: c["id"] for c in categories}

new_annotations = []
next_id = 1
report = []

for img_id, anns in anns_by_img.items():
    # 1) collect T1 polygons and choose the most superior (smallest y-centroid)
    t1_polys = []
    for a in anns:
        if a["_name"] == "T1":
            for poly in a["segmentation"]:
                if isinstance(poly, list) and len(poly) >= 2*MIN_VERT_POINTS:
                    t1_polys.append(poly)
    if not t1_polys:
        report.append((img_id, "SKIP_NO_T1"))
        continue

    t1_centroids = [(poly_centroid(p), p) for p in t1_polys]
    t1_centroids.sort(key=lambda t: t[0][1])  # sort by cy
    (t1_cx, t1_cy), t1_poly = t1_centroids[0]

    v_below = []
    for a in anns:
        if a["_name"] != "V":
            continue
        for poly in a["segmentation"]:
            if not (isinstance(poly, list) and len(poly) >= 2*MIN_VERT_POINTS):
                continue
            cx, cy = poly_centroid(poly)
            if cy > t1_cy:  # below T1
                v_below.append((poly, cx, cy))

    v_below.sort(key=lambda t: t[2])

    ordered_polys = [("T1", t1_poly, t1_cx, t1_cy)] + [("V", p, cx, cy) for (p, cx, cy) in v_below]

    for idx, (_, poly, _, _) in enumerate(ordered_polys):
        if idx >= len(VERTEBRA_NAMES):
            break
        label = VERTEBRA_NAMES[idx]
        bbox = poly_bbox(poly)
        area = poly_area(poly)
        new_annotations.append({
            "id": next_id,
            "image_id": img_id,
            "bbox": [float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])],
            "area": float(area),
            "iscrowd": 0,
            "segmentation": [poly],
            "category_id": name_to_id[label],
        })
        next_id += 1

    report.append((img_id, f"OK_assigned_{min(len(ordered_polys), len(VERTEBRA_NAMES))}"))

new_coco = {"images": coco["images"], "annotations": new_annotations, "categories": categories}
OUT.write_text(json.dumps(new_coco, indent=2), encoding="utf-8")

ids = [a["category_id"] for a in new_annotations]
print("Unique category ids:", sorted(set(ids)))
print("Counts per id:", dict(Counter(ids)))
print("Images processed:", len(report))
print("Sample reports:", report[:10])


Unique category ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
Counts per id: {1: 177, 2: 176, 3: 176, 4: 176, 5: 174, 6: 174, 7: 174, 8: 174, 9: 173, 10: 172, 11: 172, 12: 172, 13: 170, 14: 169, 15: 168, 16: 163, 17: 129}
Images processed: 178
Sample reports: [(0, 'OK_assigned_17'), (1, 'OK_assigned_17'), (2, 'OK_assigned_17'), (3, 'OK_assigned_17'), (4, 'OK_assigned_17'), (5, 'OK_assigned_17'), (6, 'OK_assigned_17'), (7, 'OK_assigned_17'), (8, 'OK_assigned_17'), (9, 'OK_assigned_17')]


In [17]:
import pandas as pd

out_coco = json.loads(OUT.read_text(encoding="utf-8"))
out_coco.keys()

dict_keys(['images', 'annotations', 'categories'])

In [20]:
df = pd.DataFrame(out_coco["annotations"])
df.value_counts("category_id")

Unnamed: 0_level_0,count
category_id,Unnamed: 1_level_1
1,177
2,176
3,176
4,176
5,174
6,174
7,174
8,174
9,173
10,172


In [21]:
df = pd.DataFrame(out_coco["images"])
df

Unnamed: 0,id,toras_path
0,0,/S_21.jpg
1,1,/S_22.jpg
2,2,/S_23.jpg
3,3,/S_24.jpg
4,4,/S_25.jpg
...,...,...
180,180,/N_26.jpg
181,181,/N_27.jpg
182,182,/N_28.jpg
183,183,/N_30.jpg


## Quitando anotaciones de imágenes que no existen

In [24]:
import json
from pathlib import Path

IN  = Path(f"{BASE_PATH}/annotations_T1_T12_L1_L5_below_T1.json")
OUT = Path(f"{BASE_PATH}/annotations_filtered.json")

coco = json.loads(IN.read_text(encoding="utf-8"))

images_to_drop = {im["id"] for im in coco["images"] if im["toras_path"].startswith("N_")}

new_images = [im for im in coco["images"] if im["id"] not in images_to_drop]

new_annotations = [ann for ann in coco["annotations"] if ann["image_id"] not in images_to_drop]

def fix_file_name(images: list[dict]) -> list:
  fixed_images = []
  for img in coco["images"]:
    new_img = img.copy()

    if "toras_path" in new_img:
        new_img["file_name"] = Path(new_img["toras_path"]).name  # only filename
        del new_img["toras_path"]

    fixed_images.append(new_img)

  return fixed_images

new_images = fix_file_name(new_images)

new_coco = {
    "images": new_images,
    "annotations": new_annotations,
    "categories": coco["categories"]
}
OUT.write_text(json.dumps(new_coco, indent=2), encoding="utf-8")

print(f"Images: {len(new_images)} | Annotations: {len(new_annotations)}")

Images: 185 | Annotations: 2889


In [25]:
import pandas as pd

out_coco = json.loads(OUT.read_text(encoding="utf-8"))
out_coco.keys()

dict_keys(['images', 'annotations', 'categories'])

In [26]:
df = pd.DataFrame(out_coco["annotations"])
df.value_counts("category_id")

Unnamed: 0_level_0,count
category_id,Unnamed: 1_level_1
1,177
2,176
3,176
4,176
5,174
6,174
7,174
8,174
9,173
10,172
