In [3]:
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
import random
import cv2
from tqdm import tqdm
import yaml

# ============================================
# PATHS (FIXED)
# ============================================
BASE_DIR = Path.cwd().parents[0]  # matches your working EDA code
PKLOT_ROOT = BASE_DIR / "data/pklot/PKLot/PKLot"   # correct nested directory

OUT_DIR = BASE_DIR / "data/pklot_yolo"
IMG_OUT = OUT_DIR / "images"
LBL_OUT = OUT_DIR / "labels"

TRAIN_DIR = OUT_DIR / "train"
VAL_DIR   = OUT_DIR / "val"
TEST_DIR  = OUT_DIR / "test"

CLASS_MAP = {"Empty": 0, "Occupied": 1}

# ============================================
# Helpers
# ============================================
def ensure_dirs():
    IMG_OUT.mkdir(parents=True, exist_ok=True)
    LBL_OUT.mkdir(parents=True, exist_ok=True)
    for d in [TRAIN_DIR, VAL_DIR, TEST_DIR]:
        (d / "images").mkdir(parents=True, exist_ok=True)
        (d / "labels").mkdir(parents=True, exist_ok=True)

def parse_xml(xml_file):
    try:
        root = ET.parse(xml_file).getroot()
        items = []
        for s in root.findall("space"):
            occ = int(s.attrib.get("occupied", 0))
            rect = s.find("rotatedRect")
            c = rect.find("center")
            sz = rect.find("size")
            cx = float(c.attrib["x"])
            cy = float(c.attrib["y"])
            w  = float(sz.attrib["w"])
            h  = float(sz.attrib["h"])
            items.append((occ, cx, cy, w, h))
        return items
    except Exception as e:
        return None

def convert_bbox(cx, cy, w, h, W, H):
    return cx/W, cy/H, w/W, h/H

# ============================================
# 1. EXPORT YOLO LABELS
# ============================================
def export_yolo_labels():
    ensure_dirs()
    print("=== Exporting YOLOv8 Labels ===")
    print("PKLot Root =", PKLOT_ROOT)

    count = 0
    for campus in sorted(os.listdir(PKLOT_ROOT)):
        campus_dir = PKLOT_ROOT / campus
        if not campus_dir.is_dir():
            continue
        for weather in sorted(os.listdir(campus_dir)):
            weather_dir = campus_dir / weather
            if not weather_dir.is_dir():
                continue
            for date in sorted(os.listdir(weather_dir)):
                date_dir = weather_dir / date
                if not date_dir.is_dir():
                    continue

                imgs = [f for f in os.listdir(date_dir) if f.endswith(".jpg")]

                for img_file in tqdm(imgs, desc=f"{campus}/{weather}/{date}"):
                    img_path = date_dir / img_file
                    xml_path = img_path.with_suffix(".xml")

                    if not xml_path.exists():
                        continue

                    ann = parse_xml(xml_path)
                    if ann is None:
                        continue

                    img = cv2.imread(str(img_path))
                    if img is None:
                        continue

                    H, W = img.shape[:2]

                    shutil.copy(img_path, IMG_OUT / img_file)

                    lbl_path = LBL_OUT / img_file.replace(".jpg", ".txt")
                    with open(lbl_path, "w") as f:
                        for occ, cx, cy, w, h in ann:
                            cls = 1 if occ == 1 else 0
                            nx, ny, nw, nh = convert_bbox(cx, cy, w, h, W, H)
                            f.write(f"{cls} {nx:.6f} {ny:.6f} {nw:.6f} {nh:.6f}\n")

                    count += 1

    print(f"Export complete: {count} scene images processed.")

# ============================================
# 2. TRAIN/VAL/TEST SPLIT
# ============================================
def split_dataset(train=0.70, val=0.20, test=0.10):
    print("=== Building train/val/test split ===")

    imgs = sorted([f for f in os.listdir(IMG_OUT) if f.endswith(".jpg")])
    random.shuffle(imgs)

    n = len(imgs)
    n_train = int(train * n)
    n_val   = int(val * n)

    train_set = imgs[:n_train]
    val_set   = imgs[n_train:n_train + n_val]
    test_set  = imgs[n_train + n_val:]

    def move_files(file_list, split_dir):
        for img_file in tqdm(file_list, desc=f"Moving {split_dir.name}"):
            lbl_file = img_file.replace(".jpg", ".txt")
            shutil.copy(IMG_OUT / img_file, split_dir / "images" / img_file)
            shutil.copy(LBL_OUT / lbl_file, split_dir / "labels" / lbl_file)

    move_files(train_set, TRAIN_DIR)
    move_files(val_set, VAL_DIR)
    move_files(test_set, TEST_DIR)

    print(f"Split complete:")
    print(f"  Train: {len(train_set)}")
    print(f"  Val:   {len(val_set)}")
    print(f"  Test:  {len(test_set)}")

# ============================================
# 3. WRITE YOLO CONFIG
# ============================================
def write_yolo_config():
    config_path = OUT_DIR / "pklot_yolo.yaml"
    cfg = {
        "path": str(OUT_DIR),
        "train": "train/images",
        "val":   "val/images",
        "test":  "test/images",
        "names": {0: "empty", 1: "occupied"}
    }
    with open(config_path, "w") as f:
        yaml.dump(cfg, f)
    print("YOLO config written ->", config_path)

# ============================================
# RUN PIPELINE
# ============================================
export_yolo_labels()
split_dataset()
write_yolo_config()

print("\n=== COMPLETE ===")
print("Train YOLOv8 with:")
print("yolo detect train data=data/pklot_yolo/pklot_yolo.yaml model=yolov8s.pt imgsz=640 epochs=50")


=== Exporting YOLOv8 Labels ===
PKLot Root = /home/rameyjm7/workspace/Computer Vision ECE5554/parking-detector-lpr/data/pklot/PKLot/PKLot


PUCPR/Cloudy/2012-09-12:   0%|                                                                                                                   | 0/51 [00:00<?, ?it/s]

PUCPR/Cloudy/2012-09-12: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 51/51 [00:01<00:00, 43.70it/s]
PUCPR/Cloudy/2012-09-16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 145/145 [00:03<00:00, 37.76it/s]
PUCPR/Cloudy/2012-09-28: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 132/132 [00:04<00:00, 28.60it/s]
PUCPR/Cloudy/2012-10-05: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 34.25it/s]
PUCPR/Cloudy/2012-10-12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 154/154 [00:03<00:00, 41.10it/s]
PUCPR/Cloudy/2012-10-13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 155/155 [00:04<00:0

Export complete: 12416 scene images processed.
=== Building train/val/test split ===


Moving train: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8691/8691 [02:40<00:00, 54.17it/s]
Moving val: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2483/2483 [00:41<00:00, 59.75it/s]
Moving test: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1242/1242 [00:20<00:00, 60.37it/s]

Split complete:
  Train: 8691
  Val:   2483
  Test:  1242
YOLO config written -> /home/rameyjm7/workspace/Computer Vision ECE5554/parking-detector-lpr/data/pklot_yolo/pklot_yolo.yaml

=== COMPLETE ===
Train YOLOv8 with:
yolo detect train data=data/pklot_yolo/pklot_yolo.yaml model=yolov8s.pt imgsz=640 epochs=50



