<h1>Installs</h1>

In [None]:
!pip install -U git+https://github.com/huggingface/transformers.git
!pip install pillow_heif
!pip install -U ultralytics

Collecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-bcl1eo0u
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-bcl1eo0u
  Resolved https://github.com/huggingface/transformers.git to commit 40dc11cd3eb4126652aa41ef8272525affd4a636
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


<h1>Google Drive Setup</h1>

In [None]:
# 1) Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 2) PUTANJA DO FOLDERA SA SLIKAMA (prilagodi naziv foldera)
IMAGE_DIR = "/content/drive/MyDrive/Neodata Hackathon 2025/"

# 3) Provjera da svi vide iste slike
import os

if not os.path.exists(IMAGE_DIR):
    raise FileNotFoundError(f"Folder ne postoji: {IMAGE_DIR}")

print("Slike u folderu:")
for f in os.listdir(IMAGE_DIR):
    print(" -", f)

Slike u folderu:
 - TRAIN
 - TRAIN_JPG
 - facade_element1.STP
 - facade_element2.STP
 - facade_element3.STP
 - images
 - OBJ
 - YOLO
 - Neodata2025


<h1>Data</h1>

In [None]:
import os, glob

DATA_ROOT = "/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN"

TRAIN_DIR = os.path.join(DATA_ROOT, "")
POS_DIR   = os.path.join(TRAIN_DIR, "positive")
NEG_DIR   = os.path.join(TRAIN_DIR, "negative")

for p in [DATA_ROOT, TRAIN_DIR, POS_DIR, NEG_DIR]:
    if not os.path.exists(p):
        raise FileNotFoundError(f"Missing: {p}")

print("OK. Found:")
print(" -", TRAIN_DIR)
print(" - positives:", len(glob.glob(os.path.join(POS_DIR, "*"))))
print(" - negatives:", len(glob.glob(os.path.join(NEG_DIR, "*"))))

print("\nExample POS files:", glob.glob(os.path.join(POS_DIR, "*"))[:5])
print("Example NEG files:", glob.glob(os.path.join(NEG_DIR, "*"))[:5])

OK. Found:
 - /content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/
 - positives: 12
 - negatives: 43

Example POS files: ['/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/positive/IMG_5675.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/positive/IMG_5681.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/positive/IMG_5478 3.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/positive/IMG_5678.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/positive/IMG_5679.HEIC']
Example NEG files: ['/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/negative/IMG_5354 2.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/negative/IMG_5464 3.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/negative/IMG_5662.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/negative/IMG_5401 2.HEIC', '/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN/negative/IMG_5665.HEIC']


In [None]:
from huggingface_hub import login
login()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
import torch
from transformers import Sam3Processor, Sam3Model

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

model = Sam3Model.from_pretrained("facebook/sam3").to(device)
processor = Sam3Processor.from_pretrained("facebook/sam3")

Device: cuda


Loading weights:   0%|          | 0/1468 [00:00<?, ?it/s]

<h1>Convert to JPG</h1>

In [None]:
import os
from pillow_heif import register_heif_opener
from PIL import Image
from tqdm import tqdm
import glob

register_heif_opener()

OUT_ROOT = "/content/drive/MyDrive/Neodata Hackathon 2025/TRAIN_JPG"
OUT_POS = f"{OUT_ROOT}/positive"
OUT_NEG = f"{OUT_ROOT}/negative"
os.makedirs(OUT_POS, exist_ok=True)
os.makedirs(OUT_NEG, exist_ok=True)

In [None]:
import random
from PIL import Image

POS_JPG = sorted(glob.glob(os.path.join(OUT_POS, "*.jpg")))
NEG_JPG = sorted(glob.glob(os.path.join(OUT_NEG, "*.jpg")))

img_path = random.choice(NEG_JPG)   # switch to NEG_JPG if you want
print("Using:", img_path)

image = Image.open(img_path).convert("RGB")
image

In [None]:
import torch

prompt = "large orange wavy area"  # try: "hole", "glass", "rubber gasket", "metal sheet"

inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(**inputs)

results = processor.post_process_instance_segmentation(
    outputs,
    threshold=0.6,
    mask_threshold=0.5,
    target_sizes=inputs.get("original_sizes").tolist()
)[0]

print(f"Prompt: {prompt} | Found {len(results['masks'])} instances")

Prompt: large orange wavy area | Found 0 instances


In [None]:
import numpy as np
import matplotlib
from PIL import Image as PILImage

def overlay_masks(image, masks):
    image = image.convert("RGBA")
    masks = 255 * masks.cpu().numpy().astype(np.uint8)

    n_masks = masks.shape[0]
    if n_masks == 0:
        return image

    cmap = matplotlib.colormaps.get_cmap("rainbow").resampled(n_masks)
    colors = [tuple(int(c * 255) for c in cmap(i)[:3]) for i in range(n_masks)]

    for mask, color in zip(masks, colors):
        mask_img = PILImage.fromarray(mask)
        overlay = PILImage.new("RGBA", image.size, color + (0,))
        alpha = mask_img.point(lambda v: int(v * 0.5))
        overlay.putalpha(alpha)
        image = PILImage.alpha_composite(image, overlay)
    return image

viz = overlay_masks(image, results["masks"])
viz

<h1>Classification (Facade element)</h1>

Train YOLO

In [None]:
from pathlib import Path
import cv2
from collections import Counter

# ====== PATHS ======
YOLO_ROOT = Path("/content/drive/MyDrive/Neodata Hackathon 2025/YOLO")
DATASET = YOLO_ROOT / "dataset"

IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

# Desired copies (you can keep fixed OR auto-tune below)
# If you want fixed, set these and set AUTO_TUNE=False
N_FOR_CLASS0 = 7
N_FOR_CLASS2 = 43
N_FOR_EMPTY  = 43

AUTO_TUNE = True  # auto-adjust based on TRAIN distribution you gave (recommended)

SPLITS = ["train", "val"]  # <-- now includes val

def find_image(img_dir: Path, stem: str) -> Path | None:
    for p in img_dir.glob(stem + ".*"):
        if p.suffix.lower() in IMG_EXTS:
            return p
    return None

def parse_classes(txt_path: Path) -> list[int]:
    classes = []
    for ln in txt_path.read_text(encoding="utf-8", errors="ignore").splitlines():
        ln = ln.strip()
        if not ln:
            continue
        parts = ln.split()
        if not parts:
            continue
        try:
            classes.append(int(float(parts[0])))
        except Exception:
            pass
    return classes

def auto_tune_counts_on_train():
    """Tune N_FOR_CLASS0/N_FOR_CLASS2/N_FOR_EMPTY from TRAIN distribution only."""
    train_lbl_dir = DATASET / "labels" / "train"
    label_files = sorted(train_lbl_dir.glob("*.txt"))

    file_counts = Counter()   # counts of files containing class c
    empty_files = 0

    for lp in label_files:
        cls = set(parse_classes(lp))
        if not cls:
            empty_files += 1
            continue
        for c in cls:
            file_counts[c] += 1

    c0, c1, c2 = file_counts.get(0, 0), file_counts.get(1, 0), file_counts.get(2, 0)
    target = max(c1, 1)  # bring others roughly up to class1 count

    import math
    def per_file_copies(current):
        if current <= 0:
            return 0
        need = max(0, target - current)
        return max(0, math.ceil(need / current))

    n0 = min(per_file_copies(c0), 50)
    n2 = min(per_file_copies(c2), 200)
    ne = min(per_file_copies(empty_files), 100)

    print("\n=== TRAIN distribution (for auto-tune) ===")
    print("class0 files:", c0, "class1 files:", c1, "class2 files:", c2, "empty files:", empty_files)
    print("Auto target ~", target)
    print("Auto-chosen: N_FOR_CLASS0 =", n0, "N_FOR_CLASS2 =", n2, "N_FOR_EMPTY =", ne)
    return n0, n2, ne

if AUTO_TUNE:
    N_FOR_CLASS0, N_FOR_CLASS2, N_FOR_EMPTY = auto_tune_counts_on_train()

total_created = 0

for split in SPLITS:
    img_dir = DATASET / "images" / split
    lbl_dir = DATASET / "labels" / split

    print(f"\n=== Split: {split} ===")
    print("Images dir:", img_dir)
    print("Labels dir:", lbl_dir)

    assert img_dir.exists(), f"Missing images dir: {img_dir}"
    assert lbl_dir.exists(), f"Missing labels dir: {lbl_dir}"

    label_files = sorted(lbl_dir.glob("*.txt"))
    print("Found label files:", len(label_files))

    created = 0
    skipped_no_image = 0
    skipped_existing = 0
    skipped_other = 0

    for lbl_path in label_files:
        stem = lbl_path.stem
        classes = parse_classes(lbl_path)
        cls_set = set(classes)

        # Decide how many copies to create
        if len(classes) == 0:
            n_copies = N_FOR_EMPTY
        elif 2 in cls_set:
            n_copies = N_FOR_CLASS2
        elif 0 in cls_set:
            n_copies = N_FOR_CLASS0
        else:
            skipped_other += 1
            continue

        if n_copies <= 0:
            continue

        img_path = find_image(img_dir, stem)
        if img_path is None:
            skipped_no_image += 1
            continue

        img = cv2.imread(str(img_path))
        if img is None:
            skipped_no_image += 1
            continue

        # ONLY 90¬∞ rotation (computed once)
        img90 = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)

        for i in range(1, n_copies + 1):
            new_stem = f"{stem}_rotation{i}"
            new_img_path = img_path.with_name(new_stem + img_path.suffix)
            new_lbl_path = lbl_path.with_name(new_stem + ".txt")

            # Don‚Äôt overwrite
            if new_img_path.exists() or new_lbl_path.exists():
                skipped_existing += 1
                continue

            ok = cv2.imwrite(str(new_img_path), img90)
            if not ok:
                raise RuntimeError(f"Failed to write image: {new_img_path}")

            # Labels: full-image boxes; empty stays empty
            if len(classes) == 0:
                new_lbl_path.write_text("", encoding="utf-8")
            else:
                lines = [f"{c} 0.5 0.5 1.0 1.0" for c in classes]
                new_lbl_path.write_text("\n".join(lines) + "\n", encoding="utf-8")

            created += 1

    total_created += created
    print("Created:", created)
    print("Skipped (no matching image):", skipped_no_image)
    print("Skipped (already existed):", skipped_existing)
    print("Skipped (not class0/class2 and not empty):", skipped_other)

print(f"\nAll done. Total created across train+val: {total_created}")


=== TRAIN distribution (for auto-tune) ===
class0 files: 5 class1 files: 40 class2 files: 1 empty files: 1
Auto target ~ 40
Auto-chosen: N_FOR_CLASS0 = 7 N_FOR_CLASS2 = 39 N_FOR_EMPTY = 39

=== Split: train ===
Images dir: /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/train
Labels dir: /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/labels/train
Found label files: 47
Created: 113
Skipped (no matching image): 0
Skipped (already existed): 0
Skipped (not class0/class2 and not empty): 40

=== Split: val ===
Images dir: /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val
Labels dir: /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/labels/val
Found label files: 9
Created: 92
Skipped (no matching image): 0
Skipped (already existed): 0
Skipped (not class0/class2 and not empty): 5

All done. Total created across train+val: 205


In [None]:
from ultralytics import YOLO
import torch
from pathlib import Path

DATA_YAML = DATASET / "facade.yaml"
PROJECT = YOLO_ROOT / "runs"
NAME = "yolov8n_balanced_train"

DEVICE = 0 if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

MODEL_WEIGHTS = "yolov8n.pt"
model = YOLO(MODEL_WEIGHTS)

# --- Sanity check: ensure val labels are single-class per image ---
VAL_LABELS = DATASET / "labels" / "val"
multi_class_files = []
for p in sorted(VAL_LABELS.glob("*.txt")):
    lines = [ln.strip() for ln in p.read_text(encoding="utf-8", errors="ignore").splitlines() if ln.strip()]
    if not lines:
        continue
    classes = []
    for ln in lines:
        try:
            classes.append(int(float(ln.split()[0])))
        except Exception:
            pass
    if len(set(classes)) > 1:
        multi_class_files.append((p.name, sorted(set(classes))))

if multi_class_files:
    print("\n‚ö†Ô∏è WARNING: Some VAL label files contain MULTIPLE classes (this breaks your assumption).")
    print("First few examples:")
    for name, cls in multi_class_files[:10]:
        print(" -", name, "classes:", cls)
else:
    print("\n‚úÖ VAL labels look single-class per image (or empty).")

# Train
train_results = model.train(
    data=str(DATA_YAML),
    epochs=30,
    patience=8,
    imgsz=384,
    batch=16 if DEVICE != "cpu" else 4,
    workers=2,
    device=DEVICE,
    amp=(DEVICE != "cpu"),
    cache=True,
    lr0=0.003,
    warmup_epochs=2,
    close_mosaic=10,
    project=str(PROJECT),
    name=NAME,
    verbose=True,
)

# Validate strictly on val split
metrics = model.val(data=str(DATA_YAML), device=DEVICE, split="val")
box = metrics.box

precision = float(box.mp)
recall    = float(box.mr)
f1        = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0
map50     = float(box.map50)
map5095   = float(box.map)

print("\n=== VAL metrics (detection) ===")
print(f"Precision (mean): {precision:.4f}")
print(f"Recall    (mean): {recall:.4f}")
print(f"F1        (mean): {f1:.4f}")
print(f"mAP@0.5         : {map50:.4f}")
print(f"mAP@0.5:0.95    : {map5095:.4f}")

print("\nBest model weights saved at:")
print(PROJECT / NAME / "weights" / "best.pt")

Using device: 0

‚úÖ VAL labels look single-class per image (or empty).
Ultralytics 8.3.237 üöÄ Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (NVIDIA A100-SXM4-80GB, 81222MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/facade.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=384, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.003, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, nam

  attn = (q.transpose(-2, -1) @ k) * self.scale
  x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))


[34m[1mtrain: [0mFast image access ‚úÖ (ping: 0.6¬±0.1 ms, read: 644.0¬±353.5 MB/s, size: 5153.2 KB)
[K[34m[1mtrain: [0mScanning /content/drive/.shortcut-targets-by-id/1eACqbAyiu4G3bgvb1aMnfl0pjOD46Nka/Neodata Hackathon 2025/YOLO/dataset/labels/train.cache... 160 images, 40 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 160/160 227.9Kit/s 0.0s
[K[34m[1mtrain: [0mCaching images (0.0GB RAM): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 160/160 36.8it/s 4.4s
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.9¬±0.9 ms, read: 591.6¬±336.6 MB/s, size: 5830.2 KB)
[K[34m[1mval: [0mScanning /content/drive/.shortcut-targets-by-id/1eACqbAyiu4G3bgvb1aMnfl0pjOD46Nka/Neodata Hackathon 2025/YOLO/dataset/labels/val.cache... 101 images, 40 background

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       1/30      1.04G     0.8263      2.895      1.287         31        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 9.6it/s 1.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 11.7it/s 0.3s
                   all        101         61    0.00201          1      0.451      0.409

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       2/30      1.08G     0.3767      2.607      1.044         39        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.7it/s 0.2s<5.2s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       2/30      1.08G     0.3029      2.283     0.9831         39        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 12.0it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 11.6it/s 0.3s
                   all        101         61     0.0022          1      0.732       0.69

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       3/30      1.14G      0.225      1.825     0.9188         28        384: 0% ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 0/10  0.1s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       3/30      1.14G     0.2573      1.614     0.9428         44        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 12.0it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 11.4it/s 0.4s
                   all        101         61      0.761      0.458      0.503      0.385

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       4/30      1.14G     0.2153      1.205     0.9202         31        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.9it/s 0.2s<4.6s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       4/30      1.14G     0.2163      1.129     0.9143         33        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.0it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.6it/s 0.3s
                   all        101         61      0.881      0.458      0.627      0.538

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       5/30      1.14G     0.2279      1.006     0.8749         43        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.9it/s 0.2s<4.6s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       5/30      1.14G     0.2403     0.9504     0.9053         35        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 12.4it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.4it/s 0.3s
                   all        101         61       0.99      0.533      0.723      0.649

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       6/30      1.14G     0.2655     0.9467      0.893         35        384: 0% ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 0/10  0.1s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       6/30      1.14G     0.2721     0.8557      0.903         44        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.0it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.6it/s 0.3s
                   all        101         61      0.756      0.812      0.781      0.721

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       7/30      1.16G     0.2365     0.7291     0.9006         32        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.3it/s 0.1s<4.0s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       7/30      1.16G     0.2736     0.7933     0.9186         37        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.9it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.5it/s 0.3s
                   all        101         61      0.899      0.928      0.995      0.961

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       8/30      1.16G     0.2569     0.8347     0.9249         42        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.9it/s 0.2s<4.9s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       8/30      1.16G      0.243      0.753     0.9084         39        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.5it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.5it/s 0.3s
                   all        101         61      0.872      0.832      0.983      0.939

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K       9/30      1.17G      0.242     0.6444     0.8931         37        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.0it/s 0.2s<4.5s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K       9/30      1.17G     0.2618     0.7043      0.918         33        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.7it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.1it/s 0.3s
                   all        101         61      0.786      0.959      0.878       0.87

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      10/30      1.17G     0.2476     0.6322     0.8732         38        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.9it/s 0.2s<4.8s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      10/30      1.17G     0.2306     0.6653      0.892         35        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.3it/s 0.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.1it/s 0.3s
                   all        101         61      0.598      0.781      0.855      0.804

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      11/30      1.19G     0.2502     0.7757     0.9008         38        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.3it/s 0.1s<4.0s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      11/30      1.19G     0.2449      0.653     0.9133         42        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 14.2it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.1it/s 0.3s
                   all        101         61      0.633      0.771       0.82      0.769

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      12/30      1.19G      0.265     0.6706     0.9182         40        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.0it/s 0.2s<4.6s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      12/30      1.19G     0.2414     0.6133     0.9097         33        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.6it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.8it/s 0.3s
                   all        101         61      0.527      0.713      0.805      0.676

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      13/30      1.19G     0.2117     0.6916     0.9205         38        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.2it/s 0.1s<4.0s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      13/30      1.19G     0.2126     0.6377      0.921         39        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.9it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.6it/s 0.3s
                   all        101         61      0.695      0.454      0.574      0.569

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      14/30      1.19G     0.1968     0.5719     0.9143         31        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 1.9it/s 0.2s<4.6s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      14/30      1.19G     0.2095     0.6367     0.9051         36        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 13.8it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 12.9it/s 0.3s
                   all        101         61       0.65      0.857      0.859      0.855

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      15/30      1.19G     0.2115     0.5314     0.9042         33        384: 10% ‚îÅ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ 1/10 2.3it/s 0.1s<3.9s

  pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))


[K      15/30      1.19G     0.2195     0.5939     0.9128         35        384: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 10/10 14.1it/s 0.7s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 13.0it/s 0.3s
                   all        101         61      0.888        0.8      0.781      0.755
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 8 epochs. Best results observed at epoch 7, best model saved as best.pt.
To update EarlyStopping(patience=8) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.

15 epochs completed in 0.006 hours.
Optimizer stripped from /content/drive/.shortcut-targets-by-id/1eACqbAyiu4G3bgvb1aMnfl0pjOD46Nka/Neodata Hackathon 2025/YOLO/runs/yolov8n_balanced_train5/weights/last.pt, 6.2MB
Optimizer stripped from /content/drive/.shortcut-targets-by-id/1eACqbAyiu4G3bgvb1aMnfl0pjOD46Nka/Neodata Hackath

In [None]:
import pandas as pd
from pathlib import Path
import numpy as np

VAL_IMAGES = DATASET / "images" / "val"
VAL_LABELS = DATASET / "labels" / "val"

assert VAL_IMAGES.exists(), f"Missing: {VAL_IMAGES}"
assert VAL_LABELS.exists(), f"Missing: {VAL_LABELS}"

# Predict (in-memory)
results = model.predict(
    source=str(VAL_IMAGES),
    imgsz=384,
    conf=0.25,
    iou=0.5,
    save=True,
    save_txt=True,
    project=str(PROJECT),
    name=f"{NAME}_pred_val",
    device=DEVICE,
)

rows = []

def read_single_gt_class(label_path: Path):
    """Return (gt_class or None, flag_multiclass:bool)."""
    if not label_path.exists():
        return None, False
    classes = []
    for ln in label_path.read_text(encoding="utf-8", errors="ignore").splitlines():
        ln = ln.strip()
        if not ln:
            continue
        try:
            classes.append(int(float(ln.split()[0])))
        except Exception:
            pass
    if not classes:
        return None, False
    uniq = sorted(set(classes))
    return uniq[0], (len(uniq) > 1)

for res in results:
    img_path = Path(res.path)
    stem = img_path.stem

    # ---- SINGLE predicted class per image ----
    pred_class = None
    pred_conf = None

    if res.boxes is not None and len(res.boxes) > 0:
        confs = res.boxes.conf.detach().cpu().numpy()
        clss  = res.boxes.cls.detach().cpu().numpy().astype(int)
        best_i = int(np.argmax(confs))
        pred_class = int(clss[best_i])
        pred_conf = float(confs[best_i])

    # ---- SINGLE real class per image ----
    label_path = VAL_LABELS / f"{stem}.txt"
    real_class, gt_multiclass = read_single_gt_class(label_path)

    rows.append({
        "image": img_path.name,
        "predict": pred_class,          # ONE class or None
        "pred_conf": pred_conf,         # confidence of chosen class
        "real": real_class,             # ONE class or None
        "gt_multiclass": gt_multiclass  # True if your GT violates assumption
    })

df = pd.DataFrame(rows).sort_values("image")

print("\n=== ONE-class Prediction vs Ground Truth (VAL) ===")
print(df.to_string(index=False))

csv_path = PROJECT / NAME / "val_prediction_vs_gt_oneclass.csv"
df.to_csv(csv_path, index=False)
print(f"\nSaved CSV to: {csv_path}")

print(f"Annotated predictions saved to: {PROJECT / (NAME + '_pred_val')}")


image 1/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5346 2.jpg: 384x288 1 facade_element1, 8.8ms
image 2/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5349 2.jpg: 384x288 1 facade_element1, 7.7ms
image 3/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5350 2.jpg: 288x384 1 facade_element0, 1 facade_element1, 9.6ms
image 4/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5354 2.jpg: 288x384 1 facade_element0, 1 facade_element1, 7.5ms
image 5/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5443 3.jpg: 288x384 1 facade_element0, 1 facade_element1, 7.5ms
image 6/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5620.jpg: 288x384 1 facade_element0, 7.7ms
image 7/101 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/val/IMG_5620_rotation1.jpg: 384x288 1 facade_element0, 2 facade_element2s, 8.2ms

<h1>Save model</h1>

In [None]:
from pathlib import Path
import shutil

SRC = Path("/content/drive/MyDrive/Neodata Hackathon 2025/YOLO/runs/yolov8n_balanced_train/weights/best.pt")
DST = Path("/content/drive/MyDrive/Neodata Hackathon 2025/YOLO/models/facade_yolov8n_best.pt")

DST.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(SRC, DST)

print("Saved model to:", DST)

Saved model to: /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/models/facade_yolov8n_best.pt


<h1>Predict for new data</h1>

In [None]:
from ultralytics import YOLO
from pathlib import Path
import numpy as np
import pandas as pd

# ===== PATHS =====
MODEL_PATH = Path("/content/drive/MyDrive/Neodata Hackathon 2025/YOLO/models/facade_yolov8n_best.pt")
TEST_IMAGES = Path("/content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test")

assert MODEL_PATH.exists(), f"Missing model: {MODEL_PATH}"
assert TEST_IMAGES.exists(), f"Missing test data: {TEST_IMAGES}"

# ===== LOAD MODEL =====
model = YOLO(str(MODEL_PATH))

# ===== RUN PREDICTION =====
results = model.predict(
    source=str(TEST_IMAGES),
    imgsz=384,
    conf=0.25,
    iou=0.5,
    save=True,        # annotated images
    save_txt=False,   # turn off raw YOLO txt to reduce clutter
)

# ===== PROCESS RESULTS =====
rows = []

for r in results:
    img_path = Path(r.path)
    img_name = img_path.name

    # Default: no detection
    pred_class = None
    pred_conf = None

    if r.boxes is not None and len(r.boxes) > 0:
        confs = r.boxes.conf.detach().cpu().numpy()
        clss  = r.boxes.cls.detach().cpu().numpy().astype(int)

        best_i = int(np.argmax(confs))
        pred_class = int(clss[best_i])
        pred_conf = float(confs[best_i])

    rows.append({
        "image": img_name,
        "predicted_class": pred_class,
        "confidence": None if pred_conf is None else round(pred_conf, 4),
    })

df = pd.DataFrame(rows).sort_values("image")

# ===== DISPLAY =====
print("\n=== MODEL PREDICTIONS (ONE CLASS PER IMAGE) ===")
print(df.to_string(index=False))

# ===== OPTIONAL: save CSV =====
OUT_CSV = TEST_IMAGES / "predictions_summary.csv"
df.to_csv(OUT_CSV, index=False)
print(f"\nSaved prediction summary to: {OUT_CSV}")

print("\nAnnotated images saved next to runs/ directory (Ultralytics default).")


image 1/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/0 (1).jpg: 288x384 1 facade_element0, 8.9ms
image 2/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/0.jpg: 288x384 1 facade_element0, 7.7ms
image 3/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/1 (1).jpg: 384x288 2 facade_element1s, 8.4ms
image 4/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/1 (2).jpg: 288x384 2 facade_element1s, 8.3ms
image 5/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/1 (3).jpg: 288x384 1 facade_element1, 8.5ms
image 6/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/1 (4).jpg: 288x384 2 facade_element1s, 7.9ms
image 7/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/1.jpg: 384x288 1 facade_element1, 8.4ms
image 8/9 /content/drive/MyDrive/Neodata Hackathon 2025/YOLO/dataset/images/test/Kopija datoteke IMG_5659.jpg: 288x384 1 facade_e