<a href="https://colab.research.google.com/github/sinhaji14/DEEP-LEARNING-PROJECT-FOR-PLACEMENT/blob/main/Dental.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ========================
# STEP 1: Setup & Load Data
# ========================

# Install dependencies
!pip install ultralytics fiftyone -q

import os
import json
import shutil
from tqdm import tqdm

# Mount Google Drive (if dataset is in Drive, optional)
from google.colab import drive
drive.mount('/content/drive')

# Set paths
DATASET_DIR = "/content/dentalai"
IMAGES_DIR = os.path.join(DATASET_DIR, "images")
LABELS_DIR = os.path.join(DATASET_DIR, "labels")

os.makedirs(IMAGES_DIR, exist_ok=True)
os.makedirs(LABELS_DIR, exist_ok=True)

print("✅ Folders created:", DATASET_DIR)


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.3/112.3 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.7/61.7 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.4/112.4 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.8/74.8 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m140.7 MB/s[0m eta [36m0:00:00[

In [None]:
# ==============================
# STEP 3: Convert JSON -> YOLO Segmentation
# ==============================

import os
import json
import cv2
import numpy as np
from tqdm import tqdm

# Paths
DRIVE_DATASET_PATH = "/content/drive/MyDrive/DENTALDATASET"   # change if needed
OUTPUT_DATASET_PATH = "/content/dentalai-yolo"

os.makedirs(OUTPUT_DATASET_PATH, exist_ok=True)

splits = ["train", "valid", "test"]

# Classes - only keep "tooth"
CLASSES = ["tooth"]

def convert_ann(json_file, out_label_file, class_map):
    with open(json_file, "r") as f:
        data = json.load(f)

    h, w = data["imageHeight"], data["imageWidth"]
    yolo_lines = []

    for shape in data["shapes"]:
        label = shape["label"]
        if label not in class_map:
            continue  # skip caries, cavity, crack

        cls_id = class_map[label]

        if shape["shape_type"] == "polygon":
            points = shape["points"]
            norm_points = []
            for x, y in points:
                norm_points.append(x / w)
                norm_points.append(y / h)

            line = f"{cls_id} " + " ".join([f"{p:.6f}" for p in norm_points])
            yolo_lines.append(line)

    if yolo_lines:
        with open(out_label_file, "w") as f:
            f.write("\n".join(yolo_lines))

# Process each split
for split in splits:
    img_dir = os.path.join(DRIVE_DATASET_PATH, split, "img")
    ann_dir = os.path.join(DRIVE_DATASET_PATH, split, "ann")

    out_img_dir = os.path.join(OUTPUT_DATASET_PATH, split, "images")
    out_label_dir = os.path.join(OUTPUT_DATASET_PATH, split, "labels")

    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_label_dir, exist_ok=True)

    for file in tqdm(os.listdir(img_dir), desc=f"Processing {split}"):
        if not file.endswith(".jpg") and not file.endswith(".png"):
            continue

        base = os.path.splitext(file)[0]
        json_file = os.path.join(ann_dir, base + ".json")
        if not os.path.exists(json_file):
            continue

        # Copy image
        shutil.copy(os.path.join(img_dir, file), os.path.join(out_img_dir, file))

        # Convert annotation
        out_label_file = os.path.join(out_label_dir, base + ".txt")
        convert_ann(json_file, out_label_file, {c: i for i, c in enumerate(CLASSES)})

print("✅ Conversion complete! YOLO dataset ready at:", OUTPUT_DATASET_PATH)


Processing train: 100%|██████████| 1991/1991 [00:01<00:00, 1180.88it/s]
Processing valid: 100%|██████████| 254/254 [00:00<00:00, 756.06it/s]
Processing test: 100%|██████████| 250/250 [00:00<00:00, 846.30it/s]

✅ Conversion complete! YOLO dataset ready at: /content/dentalai-yolo





In [None]:
# ==============================
# STEP 4: Create YOLO Dataset Config
# ==============================

yaml_content = """\
path: /content/dentalai-yolo

train: train/images
val: valid/images
test: test/images

names:
  0: tooth
"""

with open("/content/dental.yaml", "w") as f:
    f.write(yaml_content)

print("✅ dental.yaml created at /content/dental.yaml")
!cat /content/dental.yaml


✅ dental.yaml created at /content/dental.yaml
path: /content/dentalai-yolo

train: train/images
val: valid/images
test: test/images

names:
  0: tooth


In [None]:
import os, shutil

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"
TARGET_DATASET = "/content/dentalai-yolo"

splits = ["train", "valid", "test"]

for split in splits:
    src_img_dir = os.path.join(SOURCE_DATASET, split, "img")
    dst_img_dir = os.path.join(TARGET_DATASET, split, "images")
    os.makedirs(dst_img_dir, exist_ok=True)

    # Copy all images
    for f in os.listdir(src_img_dir):
        if f.lower().endswith((".jpg", ".jpeg", ".png")):
            shutil.copy(os.path.join(src_img_dir, f), os.path.join(dst_img_dir, f))

print("✅ Images copied into YOLO structure!")

# Check one split
!ls /content/dentalai-yolo/train/images | head -10


✅ Images copied into YOLO structure!
1000_jpg.rf.ad94534c8a4bf33d828b910160011dd9.jpg
1001_jpg.rf.c00322f19b0c2d53472ad0181ae21683.jpg
1002_jpg.rf.6d38658d13e1448f7275acf979722c65.jpg
1007_jpg.rf.d08c6d48aad78ebc1ae9f48075dc628a.jpg
1008_jpg.rf.e756a25bd9b1eca086c9e49c5fbdf9f1.jpg
100_jpg.rf.b82625b276769d80f0c12c1f0b318d6f.jpg
1014_jpg.rf.cfe617d31002718b8273c7094fc0f248.jpg
1015_jpg.rf.f4a1d4ebf826db767c0bbb8b87446be8.jpg
1018_jpg.rf.aabbf1107ffd09f3e6e0d1e7804d8eb9.jpg
1020_jpg.rf.45bf4df271eb2b540fe792f3f8296a4f.jpg


In [None]:
from ultralytics import YOLO

# Load pretrained YOLOv8 segmentation model
model = YOLO("yolov8s-seg.pt")

# Train
model.train(
    data="/content/dental.yaml",
    epochs=50,           # increase if needed
    imgsz=640,
    batch=8,
    name="dental_yolov8_seg"
)


Ultralytics 8.3.194 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/dental.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s-seg.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=dental_yolov8_seg2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, 

RuntimeError: Dataset '/content/dental.yaml' error ❌ '/content/dental.yaml' does not exist

In [None]:
# Re-install ultralytics and other deps (if session restarted)
!pip install ultralytics opencv-python-headless matplotlib -q

import os, random, cv2, numpy as np
import matplotlib.pyplot as plt
from ultralytics import YOLO

# Paths
WEIGHTS = "/content/runs/segment/dental_yolov8_seg/weights/best.pt"  # trained weights
IMG_DIR = "/content/dentalai-yolo/valid/images"                      # validation images
OUT_DIR = "/content/infer_vis"
os.makedirs(OUT_DIR, exist_ok=True)

# Load model
model = YOLO(WEIGHTS)

# Pick a few random images
all_imgs = [os.path.join(IMG_DIR, f) for f in os.listdir(IMG_DIR) if f.lower().endswith((".jpg",".jpeg",".png"))]
sample_imgs = random.sample(all_imgs, k=min(6, len(all_imgs)))

# Run inference
results = model.predict(source=sample_imgs, imgsz=640, conf=0.25, iou=0.5, verbose=False)

def overlay_mask(img_bgr, masks, alpha=0.45):
    if masks is None or len(masks) == 0:
        return img_bgr
    colored = img_bgr.copy()
    overlay = img_bgr.copy()
    for m in masks:  # m: [H,W] boolean mask
        color = (0, 255, 255)  # cyan
        cnts, _ = cv2.findContours(m.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(overlay, cnts, -1, color, thickness=cv2.FILLED)
        cv2.drawContours(overlay, cnts, -1, (0, 0, 0), thickness=1)
    return cv2.addWeighted(overlay, alpha, colored, 1 - alpha, 0)

# Plot results
n = len(results)
cols = 3
rows = int(np.ceil(n/cols))
plt.figure(figsize=(16, 5*rows))

for i, (img_path, res) in enumerate(zip(sample_imgs, results), 1):
    img = cv2.imread(img_path)
    h, w = img.shape[:2]

    # Collect masks for "tooth" (class 0)
    masks = []
    if hasattr(res, "masks") and res.masks is not None:
        for cls_id, m in zip(res.boxes.cls.int().tolist(), res.masks.data):
            if cls_id == 0:
                mnp = m.cpu().numpy().astype(bool)
                if mnp.shape[:2] != (h, w):
                    mnp = cv2.resize(mnp.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST).astype(bool)
                masks.append(mnp)

    vis = overlay_mask(img, masks)
    out_path = os.path.join(OUT_DIR, os.path.basename(img_path))
    cv2.imwrite(out_path, vis)

    vis_rgb = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
    plt.subplot(rows, cols, i)
    plt.imshow(vis_rgb)
    plt.title(os.path.basename(img_path))
    plt.axis('off')

plt.tight_layout()
plt.show()

print(f"✅ Inference done! Visualizations saved in: {OUT_DIR}")


In [None]:
# ==============================
# STEP 9 (Fix-All): Auto-locate dataset in Drive, rebuild YOLO dataset, recreate dental.yaml, quick sanity-train
# ==============================
!pip install -q ultralytics opencv-python-headless

import os, glob, shutil, json
from tqdm import tqdm
from ultralytics import YOLO

# --- 1) Auto-locate dataset root in Drive ---
DRIVE_ROOTS = [
    "/content/drive/MyDrive",
    "/content/drive/MyDrive/DENTALDATASET",
    "/content/drive/MyDrive/DentalAI",
    "/content/drive/MyDrive/dental",
]

def find_dataset_root(candidates):
    # Look for a folder that has train/img and train/ann inside it
    for root in candidates:
        if not os.path.isdir(root):
            continue
        # Search up to 3 levels deep
        for p in glob.glob(os.path.join(root, "**"), recursive=True):
            if not os.path.isdir(p):
                continue
            if all(os.path.isdir(os.path.join(p, s)) for s in ["train","valid","test"]):
                # must contain img & ann inside train
                if os.path.isdir(os.path.join(p, "train", "img")) and os.path.isdir(os.path.join(p, "train", "ann")):
                    return p
    return None

SOURCE_DATASET = find_dataset_root(DRIVE_ROOTS)
if SOURCE_DATASET is None:
    # Fallback: print likely candidates to help you see options
    print("❌ Could not auto-locate dataset. Here are some candidates:")
    print("\n".join(sorted(glob.glob("/content/drive/MyDrive/**/train", recursive=True))[:20]))
    raise SystemExit("Please set SOURCE_DATASET manually (the folder that contains train/img and train/ann).")

print("✅ Found dataset root:", SOURCE_DATASET)

# --- 2) Create YOLO layout and copy images ---
YOLO_DATASET = "/content/dentalai-yolo"
splits = ["train", "valid", "test"]

for s in splits:
    src_img_dir = os.path.join(SOURCE_DATASET, s, "img")
    dst_img_dir = os.path.join(YOLO_DATASET, s, "images")
    dst_lab_dir = os.path.join(YOLO_DATASET, s, "labels")
    os.makedirs(dst_img_dir, exist_ok=True)
    os.makedirs(dst_lab_dir, exist_ok=True)

    # Copy images if empty
    if len([f for f in os.listdir(dst_img_dir) if f.lower().endswith((".jpg",".jpeg",".png"))]) == 0:
        if os.path.isdir(src_img_dir):
            for f in tqdm(sorted(os.listdir(src_img_dir)), desc=f"Copy {s} images"):
                if f.lower().endswith((".jpg",".jpeg",".png")):
                    shutil.copy(os.path.join(src_img_dir, f), os.path.join(dst_img_dir, f))

def yolo_counts(root):
    stats = {}
    for s in splits:
        ni = len([f for f in glob.glob(os.path.join(root, s, "images", "*")) if f.lower().endswith((".jpg",".jpeg",".png"))])
        nl = len(glob.glob(os.path.join(root, s, "labels", "*.txt")))
        stats[s] = (ni, nl)
    return stats

print("Counts after copying images:", yolo_counts(YOLO_DATASET))

# --- 3) Convert LabelMe JSON -> YOLOv8 segmentation labels (tooth only) ---
def convert_labelme_polygon(json_file, out_txt):
    with open(json_file, "r") as f:
        data = json.load(f)
    h, w = data.get("imageHeight"), data.get("imageWidth")
    if not h or not w:
        return 0
    lines = []
    for sh in data.get("shapes", []):
        lbl = (sh.get("label") or "").lower().strip()
        if "tooth" not in lbl:  # keep only tooth/teeth
            continue
        if sh.get("shape_type") != "polygon":
            continue
        pts = sh.get("points", [])
        if len(pts) < 3:
            continue
        norm = []
        for x, y in pts:
            norm.append(max(0.0, min(1.0, float(x)/w)))
            norm.append(max(0.0, min(1.0, float(y)/h)))
        if len(norm) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in norm))
    if lines:
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return len(lines)
    return 0

for s in splits:
    ann_dir = os.path.join(SOURCE_DATASET, s, "ann")
    img_dir = os.path.join(YOLO_DATASET, s, "images")
    lab_dir = os.path.join(YOLO_DATASET, s, "labels")
    os.makedirs(lab_dir, exist_ok=True)

    if not os.path.isdir(ann_dir):
        continue

    for img_path in tqdm(sorted(glob.glob(os.path.join(img_dir, "*"))), desc=f"Build {s} labels"):
        if not img_path.lower().endswith((".jpg",".jpeg",".png")):
            continue
        base = os.path.splitext(os.path.basename(img_path))[0]
        jf = os.path.join(ann_dir, base + ".json")
        of = os.path.join(lab_dir, base + ".txt")
        if os.path.exists(jf):
            convert_labelme_polygon(jf, of)
        # if json missing -> no label (image treated as background)

print("Counts after building labels:", yolo_counts(YOLO_DATASET))

# --- 4) Recreate dental.yaml ---
yaml_content = """\
path: /content/dentalai-yolo
train: train/images
val: valid/images
test: test/images
names:
  0: tooth
"""
with open("/content/dental.yaml", "w") as f:
    f.write(yaml_content)
print("✅ Wrote /content/dental.yaml")

# --- 5) Quick sanity training (3 epochs) to ensure everything works and produce best.pt ---
# If there are zero images or zero labels, this will fail; the counts printed above will help diagnose.
counts = yolo_counts(YOLO_DATASET)
if any(v[0] == 0 for v in counts.values()):
    raise SystemExit(f"❌ No images detected in {YOLO_DATASET}. Please verify SOURCE_DATASET: {SOURCE_DATASET}")

print("🚀 Starting quick sanity training (3 epochs)...")
model = YOLO("yolov8s-seg.pt")
model.train(
    data="/content/dental.yaml",
    epochs=3,
    imgsz=640,
    batch=8,
    name="dental_quickcheck_seg",
    verbose=True
)

print("✅ Done. Check weights at: /content/runs/segment/dental_quickcheck_seg/weights/best.pt")


In [None]:
# ==============================
# STEP 10: Diagnose annotations & robustly rebuild YOLO labels
# ==============================
import os, glob, json, re, shutil
from tqdm import tqdm

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"   # <- your dataset root (has train/valid/test)
YOLO_DATASET   = "/content/dentalai-yolo"
splits = ["train", "valid", "test"]

def preview_some_json(split, n=5):
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    jfs = sorted(glob.glob(os.path.join(ann_dir, "*.json")))[:n]
    print(f"\n--- Preview {split} annotations ({len(jfs)} total, showing {len(jfs[:n])}) ---")
    for jf in jfs:
        with open(jf, "r") as f:
            data = json.load(f)
        print("File:", os.path.basename(jf))
        # Print top-level keys and quick shape summary
        print(" keys:", list(data.keys()))
        shapes = data.get("shapes") or data.get("objects") or []
        if isinstance(shapes, list):
            labs = [(s.get("label") or s.get("class") or "").strip() for s in shapes if isinstance(s, dict)]
            tys  = [(s.get("shape_type") or s.get("type") or "").strip() for s in shapes if isinstance(s, dict)]
            print(" labels (first 5):", labs[:5])
            print(" shape_types (first 5):", tys[:5])
        else:
            print(" shapes not a list; raw:", type(shapes))
        # try to show image dims
        print(" imageHeight:", data.get("imageHeight"), " imageWidth:", data.get("imageWidth"))
        print("-")

def collect_label_stats(split):
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    labels = {}
    shape_types = {}
    total_shapes = 0
    tooth_like = 0
    jfs = glob.glob(os.path.join(ann_dir, "*.json"))
    for jf in jfs:
        try:
            with open(jf, "r") as f:
                data = json.load(f)
        except Exception:
            continue
        shapes = data.get("shapes") or data.get("objects") or []
        if not isinstance(shapes, list):
            continue
        for s in shapes:
            if not isinstance(s, dict):
                continue
            lbl = (s.get("label") or s.get("class") or "").strip()
            st  = (s.get("shape_type") or s.get("type") or "").strip()
            labels[lbl] = labels.get(lbl, 0) + 1
            shape_types[st] = shape_types.get(st, 0) + 1
            total_shapes += 1
            if re.search(r"\btooth|\bteeth", lbl, re.IGNORECASE):
                tooth_like += 1
    return labels, shape_types, total_shapes, tooth_like

# 1) Show quick previews
preview_some_json("train", n=3)
preview_some_json("valid", n=3)

# 2) Aggregate label/shape stats
for sp in splits:
    labs, tys, total, tooth_like = collect_label_stats(sp)
    print(f"\n=== {sp.upper()} STATS ===")
    print("Total shapes:", total, " | tooth/teeth-like shapes:", tooth_like)
    print("Top 10 labels:", sorted(labs.items(), key=lambda x: x[1], reverse=True)[:10])
    print("Shape types:", tys)

# 3) Rebuild labels with robust rules:
#    - Accept labels that contain 'tooth' or 'teeth' (case-insensitive), or EXACTLY one of known tooth labels if dataset uses specific naming.
#    - Handle polygon or rectangle shapes. (circle/line/point skipped)
import math

def rect_to_polygon(points):
    # LabelMe rectangle is usually 2 points: [x1,y1],[x2,y2]
    if not isinstance(points, list) or len(points) != 2:
        return None
    (x1, y1), (x2, y2) = points
    return [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]

def convert_json_to_yolo_seg(jf, out_txt):
    with open(jf, "r") as f:
        data = json.load(f)
    h, w = data.get("imageHeight"), data.get("imageWidth")
    if not h or not w:
        return 0
    shapes = data.get("shapes") or data.get("objects") or []
    if not isinstance(shapes, list):
        return 0

    lines = []
    for s in shapes:
        if not isinstance(s, dict):
            continue
        lbl = (s.get("label") or s.get("class") or "").strip()
        st  = (s.get("shape_type") or s.get("type") or "").strip().lower()
        # keep only tooth-like labels
        if not re.search(r"\btooth|\bteeth", lbl, re.IGNORECASE):
            continue

        pts = s.get("points")
        poly = None
        if st == "polygon" and isinstance(pts, list) and len(pts) >= 3:
            poly = pts
        elif st == "rectangle":
            poly = rect_to_polygon(pts)
        else:
            # skip unsupported shapes (circle/line/point)
            continue

        if not poly or len(poly) < 3:
            continue

        # Normalize polygon to YOLOv8-seg format: class x1 y1 x2 y2 ... (normalized 0..1)
        norm = []
        for x, y in poly:
            nx = max(0.0, min(1.0, float(x) / w))
            ny = max(0.0, min(1.0, float(y) / h))
            norm.extend([nx, ny])
        if len(norm) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in norm))

    if lines:
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return len(lines)
    else:
        # if no tooth polygons found, remove stale label file if exists
        if os.path.exists(out_txt):
            os.remove(out_txt)
        return 0

# 4) Build labels into YOLO folder
for sp in splits:
    img_dir = os.path.join(YOLO_DATASET, sp, "images")
    lab_dir = os.path.join(YOLO_DATASET, sp, "labels")
    ann_dir = os.path.join(SOURCE_DATASET, sp, "ann")
    os.makedirs(lab_dir, exist_ok=True)
    if not os.path.isdir(img_dir):
        print(f"⚠️ Missing images dir for split {sp}: {img_dir}")
        continue
    made = 0
    for img_path in tqdm(sorted(glob.glob(os.path.join(img_dir, "*"))), desc=f"Rebuild {sp} labels"):
        if not img_path.lower().endswith((".jpg",".jpeg",".png")):
            continue
        base = os.path.splitext(os.path.basename(img_path))[0]
        jf = os.path.join(ann_dir, base + ".json")
        out_txt = os.path.join(lab_dir, base + ".txt")
        if os.path.exists(jf):
            made += convert_json_to_yolo_seg(jf, out_txt)
    print(f"{sp}: wrote {made} polygons into label files.")

# 5) Count how many labels now
def yolo_counts(root):
    stats = {}
    for s in splits:
        ni = len([f for f in glob.glob(os.path.join(root, s, "images", "*")) if f.lower().endswith((".jpg",".jpeg",".png"))])
        nl = len(glob.glob(os.path.join(root, s, "labels", "*.txt")))
        stats[s] = (ni, nl)
    return stats

print("\nCounts after robust rebuild:", yolo_counts(YOLO_DATASET))



In [None]:
# ==============================
# STEP 11: Train YOLOv8-Segmentation (full run)
# ==============================
!pip install -q ultralytics

import os, glob
from ultralytics import YOLO

DATA_YAML = "/content/dental.yaml"                # created earlier
RUN_NAME  = "dental_teeth_seg_v1"                 # change if you want a new run name

# Sanity checks: dataset & labels must exist
def count_items(root="/content/dentalai-yolo"):
    stats = {}
    for s in ["train", "valid", "test"]:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith((".jpg",".jpeg",".png"))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        stats[s] = (ni, nl)
    return stats

stats = count_items()
print("Counts (images, labels) ->", stats)
assert os.path.exists(DATA_YAML), "dental.yaml not found. Recreate it before training."
assert stats["train"][0] > 0, "No training images found."
assert stats["train"][1] > 0, "No training labels found. Re-run the label conversion step."

# Train
model = YOLO("yolov8s-seg.pt")  # you can switch to yolov8m-seg.pt for better accuracy
model.train(
    data=DATA_YAML,
    epochs=50,          # increase for better results (e.g., 100+)
    imgsz=640,
    batch=8,
    name=RUN_NAME,
    workers=8,
    verbose=True
)

print(f"\n✅ Training finished. Weights should be at /content/runs/segment/{RUN_NAME}/weights/best.pt")


In [None]:
# ==============================
# STEP 11 (Fix): Universal JSON/mask → YOLOv8-Seg converter
#  - Scans your ann/*.json to detect schema & label names
#  - Converts polygons/rectangles to YOLOv8 segmentation .txt
#  - If masks/ exist, converts mask PNGs to polygons
# ==============================
!pip install -q opencv-python-headless shapely

import os, glob, json, re, cv2, numpy as np
from tqdm import tqdm
from shapely.geometry import Polygon
from shapely.ops import unary_union

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"   # <-- your dataset root
YOLO_DATASET   = "/content/dentalai-yolo"
SPLITS = ["train","valid","test"]

# -------- helper: ensure dirs --------
for s in SPLITS:
    os.makedirs(os.path.join(YOLO_DATASET, s, "images"), exist_ok=True)
    os.makedirs(os.path.join(YOLO_DATASET, s, "labels"), exist_ok=True)

# -------- helper: copy images if empty --------
for s in SPLITS:
    src_img = os.path.join(SOURCE_DATASET, s, "img")
    dst_img = os.path.join(YOLO_DATASET, s, "images")
    if os.path.isdir(src_img) and len(glob.glob(os.path.join(dst_img, "*"))) == 0:
        for f in tqdm(sorted(os.listdir(src_img)), desc=f"Copy {s} images"):
            if f.lower().endswith((".jpg",".jpeg",".png")):
                src = os.path.join(src_img, f)
                dst = os.path.join(dst_img, f)
                if not os.path.exists(dst):
                    try:
                        os.link(src, dst)  # fast hardlink
                    except Exception:
                        import shutil; shutil.copy(src, dst)

# -------- detection of label terms --------
def scan_labels():
    label_counts = {}
    shape_types  = {}
    has_mask_dir = {}
    for s in SPLITS:
        has_mask_dir[s] = os.path.isdir(os.path.join(SOURCE_DATASET, s, "masks"))
        for jf in glob.glob(os.path.join(SOURCE_DATASET, s, "ann", "*.json")):
            try:
                data = json.load(open(jf, "r"))
            except Exception:
                continue

            # LabelMe-style
            shapes = data.get("shapes")
            if isinstance(shapes, list):
                for sh in shapes:
                    if not isinstance(sh, dict): continue
                    lbl = (sh.get("label") or "").strip()
                    st  = (sh.get("shape_type") or "").strip()
                    label_counts[lbl] = label_counts.get(lbl, 0) + 1
                    shape_types[st]   = shape_types.get(st, 0) + 1

            # COCO-per-image style (rare, but some datasets do it)
            anns = data.get("annotations")
            cats = data.get("categories")
            if isinstance(anns, list):
                cat_map = {}
                if isinstance(cats, list):
                    for c in cats:
                        cat_map[c.get("id")] = c.get("name")
                for a in anns:
                    cid = a.get("category_id")
                    name = cat_map.get(cid, str(cid))
                    label_counts[name] = label_counts.get(name, 0) + 1
                    st = "coco_poly" if a.get("segmentation") else "coco_box" if a.get("bbox") else "unknown"
                    shape_types[st] = shape_types.get(st, 0) + 1

            # Supervisely
            objs = data.get("objects")
            if isinstance(objs, list):
                for o in objs:
                    name = (o.get("classTitle") or o.get("class_name") or "").strip()
                    label_counts[name] = label_counts.get(name, 0) + 1
                    if "points" in o: shape_types["polygon"] = shape_types.get("polygon", 0) + 1

            # VIA
            regions = data.get("regions")
            if isinstance(regions, list):
                for r in regions:
                    ra = r.get("region_attributes") or {}
                    lbl = ra.get("label") or ra.get("name") or "via_object"
                    label_counts[str(lbl)] = label_counts.get(str(lbl), 0) + 1
                    sa = r.get("shape_attributes") or {}
                    st = sa.get("name")
                    shape_types[st] = shape_types.get(st, 0) + 1
    return label_counts, shape_types, has_mask_dir

label_counts, shape_types, has_mask_dir = scan_labels()
print("\n== Label name histogram (top 20) ==")
for k,v in sorted(label_counts.items(), key=lambda x: x[1], reverse=True)[:20]:
    print(f"{k!r}: {v}")
print("\n== Shape types ==")
for k,v in shape_types.items():
    print(f"{k!r}: {v}")
print("\nMasks dirs present:", has_mask_dir)

# -------- label filters you want to keep as "tooth" ----------
# We accept anything that *contains* these substrings:
KEEP_REGEX = re.compile(r"(tooth|teeth|teeths|tooth_upper|tooth_lower|upper_teeth|lower_teeth|teeth_visible|tooth_area)", re.IGNORECASE)

# If your dataset uses specific names (e.g. "tooth_whole", "teeth_seg", "mouth_teeth"),
# add them to the regex above or list them here and we'll OR them in:
ADDITIONAL_LABELS = set([
    # e.g., "teeth", "teeth_region"
])

# -------- polygon writers --------
def write_yolo_poly(out_txt, polys_norm):
    """
    polys_norm: list of polygons, each polygon is [x1,y1,x2,y2,...] normalized 0..1
    """
    lines = []
    for norm in polys_norm:
        if len(norm) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in norm))
    if lines:
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return True
    return False

def clamp01(x):
    return max(0.0, min(1.0, x))

# unify list of (x,y) -> flat normalized
def norm_flat(poly_xy, w, h):
    nf = []
    for x,y in poly_xy:
        nf.append(clamp01(float(x)/w))
        nf.append(clamp01(float(y)/h))
    return nf

# merge overlapping polygons with shapely (optional, keeps fewer polys)
def merge_polygons(list_of_polys_xy):
    try:
        geoms = [Polygon(p) for p in list_of_polys_xy if len(p) >= 3]
        merged = unary_union(geoms)
        if merged.is_empty:
            return []
        if merged.geom_type == "Polygon":
            return [np.array(merged.exterior.coords[:-1]).tolist()]
        else:
            out = []
            for g in merged.geoms:
                out.append(np.array(g.exterior.coords[:-1]).tolist())
            return out
    except Exception:
        return list_of_polys_xy

# -------- converters for multiple schemas --------
def convert_labelme(data, w, h):
    out = []
    for sh in data.get("shapes", []):
        lbl = (sh.get("label") or "").strip()
        if (KEEP_REGEX.search(lbl) is None) and (lbl not in ADDITIONAL_LABELS):
            continue
        st = (sh.get("shape_type") or "polygon").lower()
        pts = sh.get("points", [])
        if st == "rectangle" and len(pts) == 2:
            (x1,y1), (x2,y2) = pts
            pts = [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]
        if len(pts) >= 3:
            out.append(pts)
    return out

def convert_coco_per_image(data, w, h):
    out = []
    cats = {c.get("id"): c.get("name") for c in data.get("categories", []) if isinstance(c, dict)}
    for a in data.get("annotations", []):
        name = cats.get(a.get("category_id"), str(a.get("category_id")))
        if (KEEP_REGEX.search(name or "") is None) and (name not in ADDITIONAL_LABELS):
            continue
        seg = a.get("segmentation")
        if isinstance(seg, list):  # polygons
            for poly in seg:
                # poly is flat list [x1,y1,x2,y2,...]
                if len(poly) >= 6:
                    # reshape to (x,y) pairs
                    pts = list(zip(poly[0::2], poly[1::2]))
                    out.append(pts)
        # if RLE, skipping for simplicity; add RLE decode if your data uses it
    return out

def convert_supervisely(data, w, h):
    out = []
    for o in data.get("objects", []):
        name = (o.get("classTitle") or o.get("class_name") or "").strip()
        if (KEEP_REGEX.search(name) is None) and (name not in ADDITIONAL_LABELS):
            continue
        if "points" in o and "exterior" in o["points"]:
            pts = o["points"]["exterior"]  # list of [x,y]
            if len(pts) >= 3:
                out.append(pts)
    return out

def convert_via(data, w, h):
    out = []
    for r in data.get("regions", []):
        ra = r.get("region_attributes") or {}
        lbl = str(ra.get("label") or ra.get("name") or "")
        if (KEEP_REGEX.search(lbl) is None) and (lbl not in ADDITIONAL_LABELS):
            continue
        sa = r.get("shape_attributes") or {}
        if sa.get("name") == "polygon":
            allx = sa.get("all_points_x") or []
            ally = sa.get("all_points_y") or []
            if len(allx) >= 3 and len(allx) == len(ally):
                pts = [[x,y] for x,y in zip(allx, ally)]
                out.append(pts)
    return out

def polygons_from_json(jf):
    data = json.load(open(jf, "r"))
    h, w = data.get("imageHeight"), data.get("imageWidth")
    # If not provided, try to read from image file
    if not (h and w):
        # try common keys
        h = data.get("height") or data.get("imgHeight")
        w = data.get("width")  or data.get("imgWidth")
    polys = []

    for conv in (convert_labelme, convert_coco_per_image, convert_supervisely, convert_via):
        try:
            tmp = conv(data, w, h)
            if tmp:
                polys.extend(tmp)
        except Exception:
            pass

    # Optional: merge overlaps
    polys = merge_polygons(polys)
    return polys, w, h

# -------- masks → polygons (fallback) --------
def polygons_from_mask(mask_path):
    m = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    if m is None:
        return [], 0, 0
    h, w = m.shape[:2]
    thr = (m > 127).astype(np.uint8)
    contours, _ = cv2.findContours(thr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polys = []
    for c in contours:
        if len(c) < 3:
            continue
        pts = c[:,0,:].tolist()  # Nx2
        polys.append(pts)
    polys = merge_polygons(polys)
    return polys, w, h

# -------- drive the conversion --------
total_written = {s:0 for s in SPLITS}

for s in SPLITS:
    img_dir = os.path.join(YOLO_DATASET, s, "images")
    lab_dir = os.path.join(YOLO_DATASET, s, "labels")
    ann_dir = os.path.join(SOURCE_DATASET, s, "ann")
    mask_dir= os.path.join(SOURCE_DATASET, s, "masks")  # optional

    imgs = [f for f in sorted(os.listdir(img_dir)) if f.lower().endswith((".jpg",".jpeg",".png"))]
    for imgname in tqdm(imgs, desc=f"Convert {s}"):
        base = os.path.splitext(imgname)[0]
        out_txt = os.path.join(lab_dir, base + ".txt")

        polys_norm = []
        used = False

        jf = os.path.join(ann_dir, base + ".json")
        if os.path.exists(jf):
            polys, w, h = polygons_from_json(jf)
            if polys and w and h:
                for p in polys:
                    polys_norm.append([v for xy in p for v in (max(0.0,min(1.0, xy[0]/w)), max(0.0,min(1.0, xy[1]/h)))])
                used = True

        # Fallback: mask -> polygon
        if not used and os.path.isdir(mask_dir):
            # try various file extensions for masks
            for ext in (".png",".jpg",".jpeg",".bmp",".tif",".tiff"):
                mp = os.path.join(mask_dir, base + ext)
                if os.path.exists(mp):
                    polys, w, h = polygons_from_mask(mp)
                    if polys and w and h:
                        for p in polys:
                            polys_norm.append([v for xy in p for v in (max(0.0,min(1.0, xy[0]/w)), max(0.0,min(1.0, xy[1]/h)))])
                        used = True
                        break

        if polys_norm:
            if write_yolo_poly(out_txt, polys_norm):
                total_written[s] += 1
        else:
            # ensure no stale file left
            if os.path.exists(out_txt):
                os.remove(out_txt)

print("\n✅ Done converting.")
print("Label files written per split:", total_written)

# ---- Show counts ----
def yolo_counts(root):
    stats = {}
    for s in SPLITS:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith((".jpg",".jpeg",".png"))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        stats[s] = (ni, nl)
    return stats

print("Counts now:", yolo_counts(YOLO_DATASET))


In [None]:
# ==============================
# STEP 11b: Deep-inspect one JSON + Rebuild labels (robust)
#  - Reads image size from the actual image (not JSON)
#  - Accepts points as [[x,y], ...] OR [{'x':..,'y':..}, ...]
#  - Accepts label variants (Tooth/teeth etc., case-insensitive)
# ==============================
import os, glob, json, re, cv2
from tqdm import tqdm

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"
YOLO_DATASET   = "/content/dentalai-yolo"
SPLITS = ["train","valid","test"]

KEEP_REGEX = re.compile(r"(tooth|teeth)", re.IGNORECASE)

def to_xy_list(points):
    """Normalize 'points' to [[x,y], ...] even if they are [{'x':..,'y':..}, ...]."""
    if not isinstance(points, list) or len(points) == 0:
        return []
    if isinstance(points[0], dict) and "x" in points[0] and "y" in points[0]:
        return [[float(p["x"]), float(p["y"])] for p in points if isinstance(p, dict) and "x" in p and "y" in p]
    # else assume list of [x,y]
    out = []
    for p in points:
        if isinstance(p, (list, tuple)) and len(p) >= 2:
            out.append([float(p[0]), float(p[1])])
    return out

def write_yolo_poly(out_txt, polys_norm):
    lines = []
    for poly in polys_norm:
        if len(poly) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in poly))
    if lines:
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return True
    return False

def clamp01(x):
    return max(0.0, min(1.0, x))

def normalize_poly(poly_xy, w, h):
    flat = []
    for x, y in poly_xy:
        flat.append(clamp01(x / w))
        flat.append(clamp01(y / h))
    return flat

# --- Inspect a sample JSON to see actual schema ---
sample_jsons = glob.glob(os.path.join(SOURCE_DATASET, "train", "ann", "*.json"))
if sample_jsons:
    jf = sample_jsons[0]
    print("🔎 Sample JSON:", jf)
    with open(jf, "r") as f:
        data = json.load(f)
    print("Top-level keys:", list(data.keys()))
    if "shapes" in data and isinstance(data["shapes"], list) and data["shapes"]:
        sh = data["shapes"][0]
        print("First shape keys:", list(sh.keys()))
        print("First shape label:", sh.get("label"))
        print("First shape type:", sh.get("shape_type"))
        print("First shape points type:", type(sh.get("points")))
        if isinstance(sh.get("points"), list) and sh["points"]:
            print("First point example:", sh["points"][0])

# --- Rebuild labels robustly using image size from actual image ---
total_written = {s: 0 for s in SPLITS}
total_images  = {s: 0 for s in SPLITS}

for split in SPLITS:
    img_dir = os.path.join(YOLO_DATASET, split, "images")
    lab_dir = os.path.join(YOLO_DATASET, split, "labels")
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    os.makedirs(lab_dir, exist_ok=True)
    imgs = [f for f in sorted(os.listdir(img_dir)) if f.lower().endswith((".jpg",".jpeg",".png"))]
    total_images[split] = len(imgs)

    for imgname in tqdm(imgs, desc=f"Rebuild {split} labels (robust)"):
        base, ext = os.path.splitext(imgname)
        img_path  = os.path.join(img_dir, imgname)
        json_path = os.path.join(ann_dir, base + ".json")
        out_txt   = os.path.join(lab_dir, base + ".txt")

        # Read actual image size
        im = cv2.imread(img_path)
        if im is None:
            # try uppercase extension fallback for annotations
            continue
        h, w = im.shape[:2]

        polys_norm = []
        if os.path.exists(json_path):
            with open(json_path, "r") as f:
                data = json.load(f)
            shapes = data.get("shapes", [])
            if isinstance(shapes, list):
                for sh in shapes:
                    lbl = (sh.get("label") or "").strip()
                    if not KEEP_REGEX.search(lbl):
                        continue
                    st = (sh.get("shape_type") or "polygon").lower()
                    pts_raw = sh.get("points", [])
                    pts_xy = to_xy_list(pts_raw)
                    if st == "rectangle" and len(pts_xy) == 2:
                        (x1,y1), (x2,y2) = pts_xy
                        pts_xy = [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]
                    if len(pts_xy) >= 3:
                        nf = normalize_poly(pts_xy, w, h)
                        if len(nf) >= 6:
                            polys_norm.append(nf)

        if polys_norm:
            if write_yolo_poly(out_txt, polys_norm):
                total_written[split] += 1
        else:
            # ensure no stale file
            if os.path.exists(out_txt):
                os.remove(out_txt)

print("\n✅ Rebuild complete.")
print("Images per split:", total_images)
print("Label files written per split:", total_written)

# --- Show a couple of label files for sanity ---
import itertools
for split in SPLITS:
    some = glob.glob(os.path.join(YOLO_DATASET, split, "labels", "*.txt"))
    if some:
        print(f"\n📄 Example labels from {split}: {os.path.basename(some[0])}")
        with open(some[0], "r") as f:
            for line in itertools.islice(f, 3):
                print("  ", line.strip())
        break


In [None]:
# ==============================
# STEP 11c: Convert Supervisely-style JSON (objects -> points.exterior) to YOLOv8-Seg labels
# ==============================
import os, glob, json, re, cv2
from tqdm import tqdm

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"
YOLO_DATASET   = "/content/dentalai-yolo"
SPLITS = ["train","valid","test"]

KEEP_REGEX = re.compile(r"(tooth|teeth)", re.IGNORECASE)

def clamp01(x):
    return max(0.0, min(1.0, x))

def norm_poly(poly_xy, w, h):
    out = []
    for x, y in poly_xy:
        out.append(clamp01(float(x)/w))
        out.append(clamp01(float(y)/h))
    return out

def write_yolo_poly(out_txt, polys_norm):
    lines = []
    for poly in polys_norm:
        if len(poly) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in poly))
    if lines:
        os.makedirs(os.path.dirname(out_txt), exist_ok=True)
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return True
    return False

written = {s:0 for s in SPLITS}
total   = {s:0 for s in SPLITS}

for split in SPLITS:
    img_dir = os.path.join(YOLO_DATASET, split, "images")
    lab_dir = os.path.join(YOLO_DATASET, split, "labels")
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    os.makedirs(lab_dir, exist_ok=True)

    imgs = [f for f in sorted(os.listdir(img_dir)) if f.lower().endswith((".jpg",".jpeg",".png"))]
    total[split] = len(imgs)

    for imgname in tqdm(imgs, desc=f"Supervisely -> YOLO ({split})"):
        base, _ = os.path.splitext(imgname)
        img_path = os.path.join(img_dir, imgname)
        jf       = os.path.join(ann_dir, base + ".json")
        out_txt  = os.path.join(lab_dir, base + ".txt")

        polys_norm = []

        # read image size from the image itself
        im = cv2.imread(img_path)
        if im is None or not os.path.exists(jf):
            # ensure no stale label
            if os.path.exists(out_txt):
                os.remove(out_txt)
            continue
        h, w = im.shape[:2]

        try:
            data = json.load(open(jf, "r"))
        except Exception:
            if os.path.exists(out_txt):
                os.remove(out_txt)
            continue

        # Supervisely schema: data['objects'] -> each has 'classTitle' and 'points': {'exterior': [[x,y],...], 'interior': [...]}
        objects = data.get("objects", [])
        for obj in objects:
            label = (obj.get("classTitle") or obj.get("class_name") or "").strip()
            if not KEEP_REGEX.search(label):
                continue

            pts = None
            if isinstance(obj.get("points"), dict):
                ext = obj["points"].get("exterior")
                if isinstance(ext, list) and len(ext) >= 3:
                    pts = [[float(x), float(y)] for x,y in ext]

            # Fallback if exterior missing but generic 'points' is a list
            if pts is None and isinstance(obj.get("points"), list) and len(obj["points"]) >= 3:
                pts = []
                for p in obj["points"]:
                    if isinstance(p, dict) and "x" in p and "y" in p:
                        pts.append([float(p["x"]), float(p["y"])])
                    elif isinstance(p, (list, tuple)) and len(p) >= 2:
                        pts.append([float(p[0]), float(p[1])])

            if pts and len(pts) >= 3:
                poly = norm_poly(pts, w, h)
                if len(poly) >= 6:
                    polys_norm.append(poly)

        if polys_norm:
            if write_yolo_poly(out_txt, polys_norm):
                written[split] += 1
        else:
            if os.path.exists(out_txt):
                os.remove(out_txt)

print("\n✅ Supervisely conversion complete.")
print("Images per split:", total)
print("Label files written per split:", written)

# Quick count
def yolo_counts(root="/content/dentalai-yolo"):
    out = {}
    for s in SPLITS:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith(('.jpg','.jpeg','.png'))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        out[s] = (ni, nl)
    return out

print("Counts now:", yolo_counts())


In [None]:
# ==============================
# STEP 11d: Robust filename matching (handles *.jpg.json / *.png.json)
# ==============================
import os, glob, json, re, cv2
from tqdm import tqdm

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"
YOLO_DATASET   = "/content/dentalai-yolo"
SPLITS = ["train","valid","test"]

KEEP_REGEX = re.compile(r"(tooth|teeth)", re.IGNORECASE)

def clamp01(x):
    return max(0.0, min(1.0, x))

def norm_poly(poly_xy, w, h):
    out = []
    for x, y in poly_xy:
        out.append(clamp01(float(x)/w))
        out.append(clamp01(float(y)/h))
    return out

def write_yolo_poly(out_txt, polys_norm):
    lines = []
    for poly in polys_norm:
        if len(poly) >= 6:
            lines.append("0 " + " ".join(f"{v:.6f}" for v in poly))
    if lines:
        os.makedirs(os.path.dirname(out_txt), exist_ok=True)
        with open(out_txt, "w") as f:
            f.write("\n".join(lines))
        return True
    return False

def find_ann_for_image(ann_dir, base, img_ext):
    """
    Try multiple patterns:
      base + ".json"
      base + img_ext + ".json" where img_ext is ".jpg"/".png"/".jpeg" (as seen in your dataset)
    Return the first that exists (case-insensitive).
    """
    candidates = [
        os.path.join(ann_dir, base + ".json"),
        os.path.join(ann_dir, base + img_ext + ".json"),
    ]
    # Case-insensitive fallback scan (only if needed)
    if not any(os.path.exists(p) for p in candidates):
        # scan ann_dir for files that start with base and endwith .json
        globbed = glob.glob(os.path.join(ann_dir, base + "*"))
        for p in globbed:
            if p.lower().endswith(".json"):
                return p
        return None
    for p in candidates:
        if os.path.exists(p):
            return p
    return None

written = {s:0 for s in SPLITS}
total   = {s:0 for s in SPLITS}

for split in SPLITS:
    img_dir = os.path.join(YOLO_DATASET, split, "images")
    lab_dir = os.path.join(YOLO_DATASET, split, "labels")
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    os.makedirs(lab_dir, exist_ok=True)

    imgs = [f for f in sorted(os.listdir(img_dir)) if f.lower().endswith((".jpg",".jpeg",".png"))]
    total[split] = len(imgs)

    for imgname in tqdm(imgs, desc=f"Match+Convert ({split})"):
        base, img_ext = os.path.splitext(imgname)  # img_ext includes dot, like ".jpg"
        img_path = os.path.join(img_dir, imgname)
        out_txt  = os.path.join(lab_dir, base + ".txt")

        # read image size
        im = cv2.imread(img_path)
        if im is None:
            if os.path.exists(out_txt): os.remove(out_txt)
            continue
        h, w = im.shape[:2]

        jf = find_ann_for_image(ann_dir, base, img_ext)
        if jf is None:
            # no annotation for this image -> remove label if any
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        # parse Supervisely-like JSON
        try:
            data = json.load(open(jf, "r"))
        except Exception:
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        objects = data.get("objects", [])
        polys_norm = []
        for obj in objects:
            label = (obj.get("classTitle") or obj.get("class_name") or "").strip()
            if not KEEP_REGEX.search(label):
                continue

            pts = None
            # Standard Supervisely polygon: points.exterior = [[x,y], ...]
            if isinstance(obj.get("points"), dict):
                ext = obj["points"].get("exterior")
                if isinstance(ext, list) and len(ext) >= 3:
                    # handle either [[x,y], ...] or [{'x':..,'y':..}, ...]
                    if isinstance(ext[0], dict) and "x" in ext[0] and "y" in ext[0]:
                        pts = [[float(p["x"]), float(p["y"])] for p in ext]
                    else:
                        pts = [[float(p[0]), float(p[1])] for p in ext if isinstance(p, (list, tuple)) and len(p) >= 2]

            # Fallback: generic list in 'points'
            if pts is None and isinstance(obj.get("points"), list) and len(obj["points"]) >= 3:
                pts = []
                for p in obj["points"]:
                    if isinstance(p, dict) and "x" in p and "y" in p:
                        pts.append([float(p["x"]), float(p["y"])])
                    elif isinstance(p, (list, tuple)) and len(p) >= 2:
                        pts.append([float(p[0]), float(p[1])])

            if pts and len(pts) >= 3:
                poly = norm_poly(pts, w, h)
                if len(poly) >= 6:
                    polys_norm.append(poly)

        if polys_norm:
            if write_yolo_poly(out_txt, polys_norm):
                written[split] += 1
        else:
            if os.path.exists(out_txt): os.remove(out_txt)

print("\n✅ Rebuild with robust filename matching complete.")
print("Images per split:", total)
print("Label files written per split:", written)

# Show counts
def yolo_counts(root="/content/dentalai-yolo"):
    out = {}
    for s in SPLITS:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith(('.jpg','.jpeg','.png'))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        out[s] = (ni, nl)
    return out

print("Counts now:", yolo_counts())


In [None]:
# ==============================
# STEP 12: If labels still 0 → build from masks (contours) and TRAIN YOLOv8-Seg
# ==============================
!pip install -q ultralytics opencv-python-headless

import os, glob, cv2, numpy as np
from tqdm import tqdm
from ultralytics import YOLO

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"   # root with train/valid/test
YOLO_DATASET   = "/content/dentalai-yolo"
DATA_YAML      = "/content/dental.yaml"
RUN_NAME       = "dental_teeth_seg_v2"

SPLITS = ["train","valid","test"]

def count_items(root=YOLO_DATASET):
    stats = {}
    for s in SPLITS:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith((".jpg",".jpeg",".png"))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        stats[s] = (ni, nl)
    return stats

def masks_to_yolo_labels(split):
    """Create YOLOv8-seg labels from binary masks (any nonzero as teeth)."""
    img_dir = os.path.join(YOLO_DATASET, split, "images")
    lab_dir = os.path.join(YOLO_DATASET, split, "labels"); os.makedirs(lab_dir, exist_ok=True)
    mask_dir = os.path.join(SOURCE_DATASET, split, "masks")
    if not os.path.isdir(mask_dir):
        return 0
    written = 0
    imgs = [f for f in sorted(os.listdir(img_dir)) if f.lower().endswith((".jpg",".jpeg",".png"))]
    for imgname in tqdm(imgs, desc=f"Mask→YOLO ({split})"):
        base, _ = os.path.splitext(imgname)
        out_txt = os.path.join(lab_dir, base + ".txt")

        # try to find a matching mask by common extensions
        mask_path = None
        for ext in (".png",".jpg",".jpeg",".bmp",".tif",".tiff"):
            p = os.path.join(mask_dir, base + ext)
            if os.path.exists(p):
                mask_path = p; break
        if mask_path is None:
            # sometimes masks keep original extension in name (e.g., xxx.jpg.png)
            cands = glob.glob(os.path.join(mask_dir, base + ".*"))
            cands = [c for c in cands if c.lower().endswith((".png",".jpg",".jpeg",".bmp",".tif",".tiff"))]
            if cands: mask_path = cands[0]
        if mask_path is None:
            # no mask for this image -> skip
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        m = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if m is None:
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        h, w = m.shape[:2]
        # binarize
        thr = (m > 127).astype(np.uint8)
        # find contours
        contours, _ = cv2.findContours(thr, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        lines = []
        for c in contours:
            if len(c) < 3:
                continue
            # simplify tiny specks
            if cv2.contourArea(c) < 20:
                continue
            pts = c[:,0,:]  # Nx2
            # normalize to [0,1]
            flat = []
            for x, y in pts:
                flat.append(max(0.0, min(1.0, float(x)/w)))
                flat.append(max(0.0, min(1.0, float(y)/h)))
            if len(flat) >= 6:
                lines.append("0 " + " ".join(f"{v:.6f}" for v in flat))

        if lines:
            with open(out_txt, "w") as f:
                f.write("\n".join(lines))
            written += 1
        else:
            if os.path.exists(out_txt): os.remove(out_txt)
    return written

# 1) If any split has 0 labels, try building from masks for that split
stats_before = count_items()
print("Before mask conversion:", stats_before)
made_total = {}
for s in SPLITS:
    if stats_before[s][1] == 0:
        made_total[s] = masks_to_yolo_labels(s)
    else:
        made_total[s] = 0
print("Labels created from masks:", made_total)

# 2) Re-count; assert train has labels
stats_after = count_items()
print("After mask conversion:", stats_after)
assert stats_after["train"][1] > 0, "Still no training labels. Check that your /train/masks exist and match image names."

# 3) Train YOLOv8-Seg
model = YOLO("yolov8s-seg.pt")
model.train(
    data=DATA_YAML,
    epochs=50,
    imgsz=640,
    batch=8,
    name=RUN_NAME,
    workers=8,
    verbose=True
)
print(f"\n✅ Training finished. Weights → /content/runs/segment/{RUN_NAME}/weights/best.pt")


In [None]:
# ==============================
# STEP: Inference + Natural Teeth Whitening (auto-find weights)
# ==============================
!pip install -q ultralytics opencv-python-headless mediapipe matplotlib numpy

import os, glob, cv2, numpy as np, matplotlib.pyplot as plt, random
from ultralytics import YOLO

# --------- helper: find latest best.pt ----------
def find_latest_best(root="/content/runs/segment"):
    candidates = glob.glob(os.path.join(root, "**", "weights", "best.pt"), recursive=True)
    if not candidates:
        raise FileNotFoundError("No trained weights found under /content/runs/segment. Train the model first.")
    candidates.sort(key=lambda p: os.path.getmtime(p), reverse=True)
    return candidates[0]

WEIGHTS = find_latest_best()
print("Using weights:", WEIGHTS)

# --------- paths ----------
IMG_DIR = "/content/dentalai-yolo/valid/images"   # change if you want train or test
OUT_DIR = "/content/teeth_whitening_out"
VIS_DIR = os.path.join(OUT_DIR, "vis")
WHITEN_DIR = os.path.join(OUT_DIR, "whitened")
MASK_DIR = os.path.join(OUT_DIR, "masks")
os.makedirs(VIS_DIR, exist_ok=True)
os.makedirs(WHITEN_DIR, exist_ok=True)
os.makedirs(MASK_DIR, exist_ok=True)

# --------- load model ----------
model = YOLO(WEIGHTS)

# --------- Mediapipe for mouth-open check ----------
import mediapipe as mp
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True)

def is_mouth_open(img_rgb, threshold=0.03):
    """Return True if mouth appears open. Uses distance ratio between upper/lower lip landmarks."""
    results = face_mesh.process(img_rgb)
    if not results.multi_face_landmarks:
        return False
    lm = results.multi_face_landmarks[0].landmark
    # Mediapipe landmarks: use inner lip top/bottom (indexes 13,14 or refined indices)
    # Fallback robust pair:
    # upper inner lip: 13, lower inner lip: 14 (common mapping) — use normalized y distance relative to face height
    try:
        top = lm[13]
        bottom = lm[14]
    except Exception:
        # refined indices can differ; try alternative indices (depends on model)
        top = lm[0]; bottom = lm[17]
    # vertical distance normalized by image height
    h = img_rgb.shape[0]
    mouth_gap = abs((bottom.y - top.y))
    return mouth_gap > threshold

# --------- Whitening function ----------
def whiten_teeth(img_bgr, mask_bool, intensity=0.20, clahe_clip=2.0, blend_alpha=0.85):
    """
    img_bgr: input image (BGR)
    mask_bool: boolean mask where True => teeth pixels
    intensity: proportion to move L channel toward 255 (0..1)
    clahe_clip: CLAHE clip limit
    blend_alpha: how much whitened region to keep (0..1)
    """
    if mask_bool.sum() == 0:
        return img_bgr

    img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB).astype(np.float32)
    L, A, B = cv2.split(img_lab)

    # Apply CLAHE to L channel (global but we'll later blend only in mask)
    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8,8))
    L_clahe = clahe.apply(L.astype(np.uint8)).astype(np.float32)

    # Create targeted L: move teeth pixels toward brighter values but keep natural contrast
    target_L = L_clahe
    # Increase brightness within mask proportionally (not full 255)
    # For masked pixels: L_new = L + intensity*(255 - L)
    mask_f = mask_bool.astype(np.float32)
    L_new = L.copy()
    L_new = L_new + mask_f * intensity * (255.0 - L_new)

    # Also blend some of CLAHE effect to avoid flatness
    L_new = (1 - 0.35*mask_f) * L_new + (0.35*mask_f) * L_clahe

    # Merge back
    lab_new = cv2.merge([np.clip(L_new,0,255).astype(np.uint8), A.astype(np.uint8), B.astype(np.uint8)])
    whitened_bgr = cv2.cvtColor(lab_new, cv2.COLOR_LAB2BGR)

    # Soften mask edges with gaussian blur (feather)
    mask_uint8 = (mask_bool.astype(np.uint8) * 255)
    k = max(7, int(round(min(img_bgr.shape[:2]) * 0.01)))  # kernel scales with image size
    if k % 2 == 0: k += 1
    mask_blur = cv2.GaussianBlur(mask_uint8, (k,k), 0).astype(np.float32)/255.0
    mask_blur = cv2.merge([mask_blur, mask_blur, mask_blur])

    # Blend: keep original outside mask, combine inside
    blended = (img_bgr.astype(np.float32) * (1 - mask_blur) + (blend_alpha*whitened_bgr.astype(np.float32) + (1-blend_alpha)*img_bgr.astype(np.float32)) * mask_blur).astype(np.uint8)
    return blended

# --------- Run inference & apply whitening ----------
all_imgs = [os.path.join(IMG_DIR, f) for f in sorted(os.listdir(IMG_DIR)) if f.lower().endswith((".jpg",".jpeg",".png"))]
sample_imgs = all_imgs  # process all; change to random.sample(all_imgs, k=20) to test fewer

print("Processing", len(sample_imgs), "images... (this may take a while)")

for img_path in sample_imgs:
    img = cv2.imread(img_path)
    if img is None:
        continue
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # 1) check mouth open; if not open, skip whitening but still save mask/vis
    mouth_open = False
    try:
        mouth_open = is_mouth_open(img_rgb, threshold=0.035)
    except Exception:
        mouth_open = True  # conservative fallback: proceed if face detection fails

    # 2) run model prediction (single-image)
    res = model.predict(source=img_path, imgsz=640, conf=0.25, iou=0.5, verbose=False)[0]

    # 3) build combined mask for class 0 (tooth)
    H, W = img.shape[:2]
    mask_comb = np.zeros((H,W), dtype=np.uint8)
    if hasattr(res, "masks") and res.masks is not None:
        # res.masks.data : list/tensor of masks in model's output size; res.boxes.cls holds classes
        cls_list = res.boxes.cls.int().tolist() if res.boxes is not None else []
        masks_data = res.masks.data if res.masks is not None else []
        for cls_id, m in zip(cls_list, masks_data):
            if int(cls_id) != 0:  # tooth class id
                continue
            mnp = m.cpu().numpy().astype(np.uint8)
            # resize mask to original size if needed
            if mnp.shape[:2] != (H,W):
                mnp = cv2.resize(mnp, (W,H), interpolation=cv2.INTER_NEAREST)
            mask_comb = np.maximum(mask_comb, mnp)

    mask_bool = (mask_comb > 0)

    # Save mask visualization
    mask_vis = (mask_comb*255).astype(np.uint8)
    cv2.imwrite(os.path.join(MASK_DIR, os.path.basename(img_path)), mask_vis)

    # 4) If mouth not open or mask empty: only save visualization and skip whitening
    basefn = os.path.basename(img_path)
    vis = img.copy()
    if mask_bool.sum() > 0:
        # overlay mask in cyan for visualization
        overlay = vis.copy()
        contours, _ = cv2.findContours(mask_comb, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cv2.drawContours(overlay, contours, -1, (0,255,255), thickness=cv2.FILLED)
        vis = cv2.addWeighted(overlay, 0.45, vis, 0.55, 0)
    # annotate mouth-open status
    txt = "MOUTH_OPEN" if mouth_open else "MOUTH_CLOSED"
    cv2.putText(vis, txt, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,255,255), 2, cv2.LINE_AA)
    cv2.imwrite(os.path.join(VIS_DIR, basefn), vis)

    if (not mouth_open) or (mask_bool.sum() == 0):
        # save original to whitened folder as-is (no change)
        cv2.imwrite(os.path.join(WHITEN_DIR, basefn), img)
        continue

    # 5) apply whitening
    whitened = whiten_teeth(img, mask_bool, intensity=0.22, clahe_clip=2.0, blend_alpha=0.9)
    cv2.imwrite(os.path.join(WHITEN_DIR, basefn), whitened)

print("Done. Visualizations ->", VIS_DIR)
print("Masks ->", MASK_DIR)
print("Whitened images ->", WHITEN_DIR)

# Quick display (show 6 samples)
to_show = sorted(glob.glob(os.path.join(WHITEN_DIR,"*")) )[:6]
plt.figure(figsize=(14,8))
for i,p in enumerate(to_show,1):
    im = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
    plt.subplot(2,3,i); plt.imshow(im); plt.title(os.path.basename(p)); plt.axis('off')
plt.show()


In [None]:
# ==============================
# CHECK / TRAIN (auto) — list runs, check labels, optionally quick-train
# ==============================
!pip install -q ultralytics opencv-python-headless

import os, glob, json
from ultralytics import YOLO

YOLO_DATASET = "/content/dentalai-yolo"
RUNS_DIR = "/content/runs/segment"

def list_runs(root=RUNS_DIR):
    runs = glob.glob(os.path.join(root, "*"))
    runs = [r for r in runs if os.path.isdir(r)]
    if not runs:
        print("No runs found in", root)
    else:
        print("Found runs:")
        for r in sorted(runs, key=os.path.getmtime, reverse=True)[:10]:
            wp = glob.glob(os.path.join(r, "weights", "*.pt"))
            print(" ", os.path.basename(r), "| weights:", wp)

def yolo_counts(root=YOLO_DATASET):
    stats = {}
    for s in ["train","valid","test"]:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith(('.jpg','.jpeg','.png'))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        stats[s] = (ni, nl)
    return stats

print("=== Existing runs ===")
list_runs()

print("\n=== Dataset counts (images, labels) ===")
counts = yolo_counts()
for k,v in counts.items():
    print(f"  {k}: images={v[0]}  labels={v[1]}")

# If no labels -> show diagnostics
if counts["train"][1] == 0:
    print("\n⚠️ No training labels found. Diagnostics:")
    # show some ann filenames and a sample JSON content to inspect schema
    sample_ann_dir = "/content/drive/MyDrive/DENTALDATASET/train/ann"
    if os.path.isdir(sample_ann_dir):
        files = sorted(glob.glob(os.path.join(sample_ann_dir, "*")) )[:10]
        print(" Sample ann files:", files)
        if files:
            sample = files[0]
            print("\n--- Sample annotation file content (first 400 chars) ---")
            try:
                with open(sample, "r") as f:
                    txt = f.read(400)
                print(txt)
            except Exception as e:
                print("Could not read sample file:", e)
    else:
        print(" Could not find annotation dir at:", sample_ann_dir)

    print("\nIf labels are missing you have two main options:")
    print(" A) Run the conversion scripts provided earlier (re-run robust converters).")
    print(" B) If you have mask PNGs under train/masks, run the mask->labels converter.")
    print("\nI can re-run conversion automatically (robust), or you can paste the output of the sample JSON above and I'll adapt conversion. Say 'auto convert' to let me re-run converters now, or 'train' to start training (will fail if labels still 0).")
else:
    # quick sanity training to produce best.pt
    print("\n✅ Found labels. Starting quick sanity training (5 epochs) to produce best.pt ...")
    model = YOLO("yolov8s-seg.pt")
    model.train(
        data="/content/dental.yaml",
        epochs=5,
        imgsz=640,
        batch=8,
        name="dental_quick_train_auto",
        verbose=True
    )
    print("\n✅ Quick training finished. Check /content/runs/segment for the new run and weights.")


In [None]:
# ==============================
# AUTO-CONVERT Supervisely-style ann/*.json → YOLOv8-seg .txt labels (Tooth-only)
# - Builds a classId -> className map by scanning JSONs and any meta files.
# - Handles polygons stored in objects[].points.exterior or objects[].points
# - Accepts filenames like '...jpg.json'
# ==============================
import os, glob, json, re, cv2
from tqdm import tqdm

SOURCE_DATASET = "/content/drive/MyDrive/DENTALDATASET"
YOLO_DATASET   = "/content/dentalai-yolo"
SPLITS = ["train","valid","test"]
KEEP_WORD = "tooth"  # keep classes that contain this substring (case-insensitive)
KEEP_RE = re.compile(re.escape(KEEP_WORD), re.IGNORECASE)

def find_all_jsons(split):
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    if not os.path.isdir(ann_dir):
        return []
    return sorted(glob.glob(os.path.join(ann_dir, "*.json")))

# 1) Build classId -> name map by scanning all JSONs (look for classTitle / class_name / label / obj['classTitle'])
id2name = {}
examples = {}
for split in SPLITS:
    for jf in find_all_jsons(split):
        try:
            data = json.load(open(jf, "r"))
        except Exception:
            continue
        # top-level categories/classes
        if isinstance(data.get("classes"), list):
            # sometimes classes is list of dicts with id/name
            for c in data.get("classes", []):
                cid = c.get("id") or c.get("classId") or c.get("class_id") or None
                name = c.get("title") or c.get("name") or c.get("title_ru") or c.get("title_en") or c.get("classTitle") or c.get("title_latin") or None
                if cid is not None and name:
                    id2name[int(cid)] = str(name)
        if isinstance(data.get("categories"), list):
            for c in data.get("categories", []):
                cid = c.get("id")
                name = c.get("name") or c.get("title")
                if cid is not None and name:
                    id2name[int(cid)] = str(name)

        # scan objects for classTitle/class_name
        objs = data.get("objects") or data.get("annotations") or data.get("shapes") or []
        if isinstance(objs, list):
            for o in objs:
                if not isinstance(o, dict):
                    continue
                # prefer classTitle, then class_name, then label
                name = o.get("classTitle") or o.get("class_title") or o.get("class_name") or o.get("label") or o.get("title")
                cid = o.get("classId") or o.get("class_id") or o.get("category_id") or o.get("cat_id")
                if cid is None and isinstance(name, (int, float)):
                    cid = int(name); name = None
                if cid is not None and name:
                    try:
                        id2name[int(cid)] = str(name)
                        examples[int(cid)] = os.path.basename(jf)
                    except Exception:
                        pass

# Print discovered mapping summary
print("Discovered classId -> name (sample):")
for k in sorted(list(id2name.keys())[:20]):
    print(" ", k, ":", id2name[k], " example-json:", examples.get(k, ""))

# If no mapping found, try to infer from label strings in objects (rare)
if not id2name:
    print("No explicit classId->name mapping found in JSONs. Will try to infer names directly from object fields.")

# 2) For each split: find matching annotation file for each image and convert polygons where className contains 'tooth'
def clamp01(x): return max(0.0, min(1.0, x))

def normalize_poly(pts, w, h):
    out = []
    for p in pts:
        x = float(p[0]); y = float(p[1])
        out.append(clamp01(x / w))
        out.append(clamp01(y / h))
    return out

def find_ann_for_image(ann_dir, basename):
    # tries basename + ".json", basename + ".*.json" etc.
    candidates = [
        os.path.join(ann_dir, basename + ".json"),
        # handle names like base + "_jpg.jpg.json" or base + ".jpg.json"
        # search any file that starts with basename and ends with .json
    ]
    for c in candidates:
        if os.path.exists(c):
            return c
    # fallback scan
    for jf in glob.glob(os.path.join(ann_dir, basename + "*")):
        if jf.lower().endswith(".json"):
            return jf
    return None

written = {s:0 for s in SPLITS}
for split in SPLITS:
    img_dir = os.path.join(YOLO_DATASET, split, "images")
    lab_dir = os.path.join(YOLO_DATASET, split, "labels"); os.makedirs(lab_dir, exist_ok=True)
    ann_dir = os.path.join(SOURCE_DATASET, split, "ann")
    if not os.path.isdir(img_dir):
        print("Missing images dir:", img_dir); continue
    imgs = sorted([f for f in os.listdir(img_dir) if f.lower().endswith((".jpg",".jpeg",".png"))])
    for imgname in tqdm(imgs, desc=f"Convert {split}"):
        base, ext = os.path.splitext(imgname)
        img_path = os.path.join(img_dir, imgname)
        out_txt = os.path.join(lab_dir, base + ".txt")
        # read size from actual image
        im = cv2.imread(img_path)
        if im is None:
            continue
        h, w = im.shape[:2]

        jf = find_ann_for_image(ann_dir, base)
        if jf is None:
            # no annotation found, remove any stale label
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        try:
            data = json.load(open(jf, "r"))
        except Exception:
            if os.path.exists(out_txt): os.remove(out_txt)
            continue

        polys_norm = []
        objs = data.get("objects") or data.get("annotations") or data.get("shapes") or []
        for o in objs:
            if not isinstance(o, dict):
                continue
            # get class name: try mapping first
            cid = o.get("classId") or o.get("class_id") or o.get("category_id") or o.get("cat_id")
            cname = None
            if cid is not None and int(cid) in id2name:
                cname = id2name[int(cid)]
            # fallback to classTitle/class_name/label in object
            if cname is None:
                cname = o.get("classTitle") or o.get("class_title") or o.get("class_name") or o.get("label") or o.get("title")

            if not cname:
                continue
            if not KEEP_RE.search(str(cname)):
                continue

            # polygon points: Supervisely often stores o['points']['exterior'] = [[x,y],...]
            pts = None
            pts_field = o.get("points") or o.get("geometry") or o.get("vertices") or None
            if isinstance(pts_field, dict):
                ext = pts_field.get("exterior") or pts_field.get("points") or pts_field.get("all_points")
                if isinstance(ext, list) and len(ext) >= 3:
                    pts = []
                    # items may be dict {'x':..,'y':..} or lists
                    for p in ext:
                        if isinstance(p, dict) and "x" in p and "y" in p:
                            pts.append([float(p["x"]), float(p["y"])])
                        elif isinstance(p, (list,tuple)) and len(p) >= 2:
                            pts.append([float(p[0]), float(p[1])])
            elif isinstance(pts_field, list) and len(pts_field) >= 3:
                # may already be list of points
                pts = []
                for p in pts_field:
                    if isinstance(p, dict) and "x" in p and "y" in p:
                        pts.append([float(p["x"]), float(p["y"])])
                    elif isinstance(p, (list,tuple)) and len(p) >= 2:
                        pts.append([float(p[0]), float(p[1])])

            # some datasets use "geometryType":"polygon" and "points" as list at top-level 'objects' entries
            if pts is None:
                # try known keys in object
                if "points" in o and isinstance(o["points"], list):
                    pts = []
                    for p in o["points"]:
                        if isinstance(p, dict) and "x" in p and "y" in p:
                            pts.append([float(p["x"]), float(p["y"])])
                        elif isinstance(p, (list,tuple)) and len(p) >= 2:
                            pts.append([float(p[0]), float(p[1])])
            if pts is None or len(pts) < 3:
                continue

            norm = normalize_poly(pts, w, h)
            if len(norm) >= 6:
                polys_norm.append(norm)

        if polys_norm:
            # write merged as multiple lines (YOLOv8 supports multiple polygons per image)
            with open(out_txt, "w") as f:
                for poly in polys_norm:
                    f.write("0 " + " ".join(f"{v:.6f}" for v in poly) + "\n")
            written[split] += 1
        else:
            if os.path.exists(out_txt): os.remove(out_txt)

print("\nConversion complete. Label files written per split:", written)

# Final counts
def yolo_counts(root="/content/dentalai-yolo"):
    stats = {}
    for s in SPLITS:
        ni = len([f for f in glob.glob(f"{root}/{s}/images/*") if f.lower().endswith(('.jpg','.jpeg','.png'))])
        nl = len(glob.glob(f"{root}/{s}/labels/*.txt"))
        stats[s] = (ni, nl)
    return stats

print("Counts now:", yolo_counts())
