# Test evaluation and visuals

This notebook runs the test split and shows:
- confusion matrix images saved by Ultralytics
- sample predictions on test images


In [None]:
from pathlib import Path
import random

import yaml
from ultralytics import YOLO
import matplotlib.pyplot as plt
from PIL import Image

# Config
DATASET_NAME = "Diurnas"  # or 'Noturnas'
MODEL_PATH = None  # set to a .pt path or leave None to auto-pick latest
DEVICE = "0"  # e.g. '0' or 'cpu'
IMGSZ = 640
CONF = 0.25
MAX_SAMPLES = 12
RANDOM_SAMPLE = True


In [None]:
CWD = Path.cwd()
PROJECT_ROOT = CWD.parent if CWD.name.lower() == 'notebooks' else CWD

def dataset_base_dirs(name):
    return [
        PROJECT_ROOT / 'datasets' / name,
        PROJECT_ROOT / 'dataset' / name,
        PROJECT_ROOT / name,
    ]

def resolve_dataset_dir(name):
    for candidate in dataset_base_dirs(name):
        if (candidate / 'data.yaml').exists():
            return candidate
    return dataset_base_dirs(name)[0]

def resolve_model_path(dataset_dir):
    runs_root = dataset_dir / 'runs' / 'segment'
    if not runs_root.exists():
        runs_root = PROJECT_ROOT / 'runs' / 'segment'
    if not runs_root.exists():
        return None

    candidates = sorted(runs_root.rglob('weights/best.pt'), key=lambda p: p.stat().st_mtime, reverse=True)
    if not candidates:
        candidates = sorted(runs_root.rglob('weights/last.pt'), key=lambda p: p.stat().st_mtime, reverse=True)
    return candidates[0] if candidates else None

dataset_dir = resolve_dataset_dir(DATASET_NAME)
data_yaml_path = dataset_dir / 'data.yaml'

if not data_yaml_path.exists():
    raise FileNotFoundError(f'data.yaml not found for {DATASET_NAME}: {data_yaml_path}')

model_path = Path(MODEL_PATH) if MODEL_PATH else resolve_model_path(dataset_dir)
if not model_path or not Path(model_path).exists():
    raise FileNotFoundError('Model path not found. Set MODEL_PATH to a .pt file.')

print('Dataset dir:', dataset_dir)
print('Data yaml:', data_yaml_path)
print('Model path:', model_path)


In [None]:
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff', '.webp'}

data_cfg = yaml.safe_load(data_yaml_path.read_text(encoding='utf-8')) or {}
data_root = Path(data_cfg.get('path', data_yaml_path.parent))
if not data_root.is_absolute():
    data_root = (data_yaml_path.parent / data_root).resolve()

test_split = data_cfg.get('test')
if test_split is None:
    raise ValueError('data.yaml has no test split. Add a test entry to run test evaluation.')

if isinstance(test_split, (list, tuple)):
    split_sources = list(test_split)
else:
    split_sources = [test_split]

def resolve_split_path(split_entry):
    split_path = Path(split_entry)
    if not split_path.is_absolute():
        split_path = data_root / split_path
    return split_path

test_images = []
for entry in split_sources:
    split_path = resolve_split_path(entry)
    if split_path.is_file() and split_path.suffix.lower() in IMAGE_EXTS:
        test_images.append(split_path)
    elif split_path.is_dir():
        test_images.extend([p for p in split_path.rglob('*') if p.is_file() and p.suffix.lower() in IMAGE_EXTS])
    else:
        print('Skip missing test path:', split_path)

test_images = sorted(set(test_images))
print('Test images found:', len(test_images))


In [None]:
model = YOLO(str(model_path))

val_args = {
    'data': str(data_yaml_path),
    'split': 'test',
    'imgsz': IMGSZ,
    'conf': CONF,
}
if DEVICE is not None:
    val_args['device'] = DEVICE

results = model.val(**val_args)
save_dir = Path(getattr(results, 'save_dir', '')) if results is not None else None
print('Results dir:', save_dir)
if results is not None:
    print(results)
    if hasattr(results, 'results_dict'):
        print('Results dict:', results.results_dict)


In [None]:
if save_dir and save_dir.exists():
    cm_paths = [
        save_dir / 'confusion_matrix.png',
        save_dir / 'confusion_matrix_normalized.png',
    ]
    for cm_path in cm_paths:
        if cm_path.exists():
            img = Image.open(cm_path)
            plt.figure(figsize=(6, 6))
            plt.imshow(img)
            plt.title(cm_path.name)
            plt.axis('off')
            plt.show()
        else:
            print('Missing:', cm_path)
else:
    print('No results dir found; skip confusion matrix.')


In [None]:
def show_grid(images, cols=3, figsize=(12, 8)):
    if not images:
        print('No images to show.')
        return
    rows = (len(images) + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=figsize)
    if rows == 1 and cols == 1:
        axes = [[axes]]
    elif rows == 1:
        axes = [axes]
    elif cols == 1:
        axes = [[ax] for ax in axes]
   
    idx = 0
    for r in range(rows):
        for c in range(cols):
            ax = axes[r][c]
            ax.axis('off')
            if idx < len(images):
                ax.imshow(images[idx])
            idx += 1
    plt.tight_layout()

sample = list(test_images)
if RANDOM_SAMPLE:
    random.shuffle(sample)
sample = sample[:MAX_SAMPLES]

if not sample:
    print('No test images to predict.')
else:
    preds = model.predict(source=sample, imgsz=IMGSZ, conf=CONF, device=DEVICE, verbose=False, save=False)
    rendered = []
    for res in preds:
        img = res.plot()
        # BGR to RGB
        img = img[:, :, ::-1]
        rendered.append(img)
    show_grid(rendered, cols=3, figsize=(12, 8))
