# WebUI-7K UI Element Detector, Training Notebook

This notebook contains **only the essential steps** used to train the final YOLO11s model
on the WebUI-7K dataset:

1. Install Ultralytics YOLO.
2. Download `biglab/webui-7k` via `huggingface_hub`.
3. Convert WebUI-7K into a 7-class YOLO dataset (Buttons, Links, Inputs, Images, Icons, Text, Headings).
4. Train a high-resolution YOLO11s detector and save the experiment results to Drive.



In [None]:
# Install Ultralytics YOLO (object detection library)
%pip install -q ultralytics tqdm

import ultralytics
ultralytics.checks()


In [None]:
import os
import json
import gzip
import shutil
from pathlib import Path

from tqdm import tqdm
from PIL import Image
from huggingface_hub import snapshot_download  # <-- NEW: download dataset from HF

drive.mount('/content/drive')

# -------------------------------------------------------------------
# 1. Download WebUI-7K from Hugging Face
# -------------------------------------------------------------------
HF_REPO_ID = "biglab/webui-7k"

print(f"Downloading {HF_REPO_ID} from Hugging Face (if not cached)...")
raw_dataset_dir = Path(
    snapshot_download(repo_id=HF_REPO_ID, repo_type="dataset")
)
print(f"Raw dataset root: {raw_dataset_dir}")

# According to the WebUI release, the DOM+screenshots live under this folder:
source_root = raw_dataset_dir / "train_split_web7k"
if not source_root.exists():
    raise FileNotFoundError(f"Could not find 'train_split_web7k' inside {raw_dataset_dir}")

# -------------------------------------------------------------------
# 2. Configure YOLO output paths and class mapping
# -------------------------------------------------------------------
# Where we will build the YOLO-style dataset
yolo_dataset_dir = Path("/content/webui_7k_yolo")

# Start fresh
if yolo_dataset_dir.exists():
    shutil.rmtree(yolo_dataset_dir)

for split in ["train", "val"]:
    (yolo_dataset_dir / split / "images").mkdir(parents=True, exist_ok=True)
    (yolo_dataset_dir / split / "labels").mkdir(parents=True, exist_ok=True)

# Mapping from WebUI roles to 7 target classes
ROLE_MAP = {
    "button": 0, "menuitem": 0, "tab": 0, "pushbutton": 0,           # â†’ Button
    "link": 1,                                                        # â†’ Link
    "textbox": 2, "searchbox": 2, "combobox": 2,
    "checkbox": 2, "radio": 2,                                       # â†’ Input
    "img": 3, "image": 3, "figure": 3,                               # â†’ Image
    "graphics-symbol": 4,                                            # â†’ Icon
    "statictext": 5, "paragraph": 5, "label": 5,                     # â†’ Text
    "heading": 6                                                     # â†’ Heading
}
TARGET_CLASSES = ["Button", "Link", "Input", "Image", "Icon", "Text", "Heading"]

# Device-scale hints for Retina-style screenshots (used as a fallback)
DEVICE_SCALE = {
    "iPad-Mini": 2.0,
    "iPad-Pro": 2.0,
    "iPhone-13 Pro": 3.0,
    "iPhone-SE": 3.0,
}

def get_scale_from_filename(filename: str) -> float:
    for name, scale in DEVICE_SCALE.items():
        if filename.startswith(name):
            return scale
    return 1.0

# -------------------------------------------------------------------
# 3. Walk through WebUI-7K and convert to YOLO labels
# -------------------------------------------------------------------
all_page_folders = sorted(
    [f for f in source_root.iterdir() if f.is_dir()]
)
split_index = int(len(all_page_folders) * 0.9)  # 90% train, 10% val
converted_images = 0

print("Converting WebUI-7K to YOLO format...")
for i, folder in tqdm(enumerate(all_page_folders), total=len(all_page_folders)):
    try:
        screenshots = list(folder.glob("*-screenshot.webp"))
        for img_path in screenshots:
            file_stem = img_path.name.replace("-screenshot.webp", "")
            scale = get_scale_from_filename(img_path.name)

            # Matching JSON files
            axtree_file = folder / f"{file_stem}-axtree.json.gz"
            bb_file = folder / f"{file_stem}-bb.json.gz"
            if not axtree_file.exists() or not bb_file.exists():
                continue

            with gzip.open(axtree_file, "rt") as f:
                ax_data = json.load(f)
            with gzip.open(bb_file, "rt") as f:
                bb_data = json.load(f)
            with Image.open(img_path) as img:
                phys_w, phys_h = img.size

            # ---- Dynamic scale correction (CSS coords â†’ pixels) ----
            max_css_w = 0.0
            max_css_h = 0.0
            for box in bb_data.values():
                if box and box["width"] > max_css_w:
                    max_css_w = box["width"]
                    max_css_h = box["height"]

            scale_x = phys_w / max_css_w if max_css_w > 10 else 1.0
            scale_y = phys_h / max_css_h if max_css_h > 10 else 1.0

            # If scales are close, lock aspect ratio
            if abs(scale_x - scale_y) < 0.1:
                scale_y = scale_x

            # If we trust this dynamic scale, override the heuristic one
            if max_css_w > 100:
                scale = scale_x

            # ---- Build YOLO label lines for this screenshot ----
            yolo_lines = []
            for node in ax_data.get("nodes", []):
                role = node.get("role", {}).get("value", "none")
                if role not in ROLE_MAP:
                    continue

                backend_id = str(node.get("backendDOMNodeId"))
                if backend_id not in bb_data:
                    continue
                box = bb_data[backend_id]
                if not box:
                    continue

                x = box["x"]
                y = box["y"]
                w = box["width"]
                h = box["height"]
                if w <= 0 or h <= 0:
                    continue

                # Transform CSS coords â†’ pixel coords
                x *= scale
                y *= scale
                w *= scale
                h *= scale

                # Convert to YOLO normalized (cx, cy, w, h)
                cx = (x + w / 2) / phys_w
                cy = (y + h / 2) / phys_h
                nw = w / phys_w
                nh = h / phys_h

                # Clamp to [0, 1]
                cx = min(max(cx, 0.0), 1.0)
                cy = min(max(cy, 0.0), 1.0)
                nw = min(max(nw, 0.0), 1.0)
                nh = min(max(nh, 0.0), 1.0)

                class_id = ROLE_MAP[role]
                yolo_lines.append(f"{class_id} {cx:.6f} {cy:.6f} {nw:.6f} {nh:.6f}")

            # Skip images with no valid elements
            if not yolo_lines:
                continue

            # Train/val split based on folder index
            subset = "train" if i < split_index else "val"

            # Construct a unique id and save image + label
            unique_id = f"{folder.name}_{file_stem}"
            img_out = yolo_dataset_dir / subset / "images" / f"{unique_id}.webp"
            lbl_out = yolo_dataset_dir / subset / "labels" / f"{unique_id}.txt"

            shutil.copy(img_path, img_out)
            with open(lbl_out, "w") as f:
                f.write("\n".join(yolo_lines))

            converted_images += 1

    except Exception:
        # Simple fail-safe: skip problematic folders without crashing
        continue

print(f"Finished conversion. Total images with labels: {converted_images}")

# -------------------------------------------------------------------
# 4. Create Ultralytics YAML config pointing to this dataset
# -------------------------------------------------------------------
yaml_content = f"""
path: {yolo_dataset_dir}
train: train/images
val: val/images
nc: {len(TARGET_CLASSES)}
names: {TARGET_CLASSES}
"""
yaml_path = Path("/content/webui7k.yaml")
yaml_path.write_text(yaml_content)

print(f"YOLO config written to {yaml_path}")
print(f"Dataset root: {yolo_dataset_dir}")


In [None]:
from ultralytics import YOLO

# 1. Load base model (YOLO11 Small)
model = YOLO('yolo11s.pt')

# 2. Train on WebUI-7K (converted YOLO dataset)
print("ðŸš€ Starting training on WebUI-7K YOLO dataset...")
results = model.train(
    data='/content/webui7k.yaml',          # created by the previous cell
    project='/content/drive/MyDrive/WebUI7K_Training',
    name='yolo11s_webui7k',
    epochs=50,
    imgsz=1280,
    batch=16,
    patience=10,
    plots=True,
    save=True,
    device=0
)

print(" Training finished. Best weights saved under /content/drive/MyDrive/WebUI7K_Training/yolo11s_webui7k/weights/best.pt .")
