In [1]:
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.2.103  Python-3.11.4 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
Setup complete  (20 CPUs, 23.7 GB RAM, 74.1/100.0 GB disk)


In [1]:
import numpy as np
from ultralytics import YOLO  # if using ultralytics
import cv2

# ---- helper: reverse letterbox (common for YOLO) ----
def scale_coords_from_letterbox(pred_xyxy, orig_shape, inp_shape):
    """
    Map predicted xyxy boxes from the model input (possibly letterboxed) back to original image size.
    pred_xyxy: Nx4 array of [x1,y1,x2,y2] in model input coordinates (pixels)
    orig_shape: (height, width)
    inp_shape: (height, width) used for model input
    Returns: Nx4 array of boxes in original image pixel coords (clipped)
    """
    orig_h, orig_w = orig_shape
    inp_h, inp_w = inp_shape

    # compute scale and padding used by letterbox
    scale = min(inp_w / orig_w, inp_h / orig_h)
    new_w = int(orig_w * scale)
    new_h = int(orig_h * scale)
    pad_w = (inp_w - new_w) / 2  # left+right total distributed evenly
    pad_h = (inp_h - new_h) / 2

    # remove padding, then divide by scale
    boxes = pred_xyxy.copy().astype(np.float32)
    boxes[:, [0,2]] -= pad_w
    boxes[:, [1,3]] -= pad_h
    boxes[:, :4] /= scale

    # clip to image
    boxes[:, 0] = boxes[:, 0].clip(0, orig_w-1)
    boxes[:, 1] = boxes[:, 1].clip(0, orig_h-1)
    boxes[:, 2] = boxes[:, 2].clip(0, orig_w-1)
    boxes[:, 3] = boxes[:, 3].clip(0, orig_h-1)
    return boxes

# ---- Run YOLO (Ultralytics) and extract boxes ----
model = YOLO(r'runs\detect\train\weights\best.pt')   # point to your trained model

def detect_and_get_boxes(img_bgr, conf_thresh=0.25):
    # img_bgr: original BGR NumPy image
    H, W = img_bgr.shape[:2]
    # Run model (returns results with boxes in model's input coords)
    results = model.predict(source=img_bgr, conf=conf_thresh, imgsz=1280)  # adjust imgsz to what you used
    detections = []
    for r in results:
        if len(r.boxes) == 0:
            continue
        # r.boxes.xyxy is Nx4 in model input coordinate system
        pred_xyxy = r.boxes.xyxy.cpu().numpy()  # Nx4
        scores = r.boxes.conf.cpu().numpy()     # Nx
        classes = r.boxes.cls.cpu().numpy().astype(int)
        # map to original image coords (reverse letterbox)
        mapped = scale_coords_from_letterbox(pred_xyxy, (H, W), (r.orig_shape[0], r.orig_shape[1]))
        for (x1,y1,x2,y2), s, c in zip(mapped, scores, classes):
            detections.append({
                'xyxy': [float(x1), float(y1), float(x2), float(y2)],
                'xywh': [float((x1+x2)/2), float((y1+y2)/2), float(x2-x1), float(y2-y1)],
                'conf': float(s),
                'class': int(c)
            })
    return detections

# Example use:
img = cv2.imread(r'runs\detect\predict\14_002_5_0025_bmp.rf.debdb2e5cdec339506b1e5944ca03feb.jpg')
dets = detect_and_get_boxes(img, conf_thresh=0.2)
print("Detections:", dets)
# Crop ROI for segmentation:
for i, det in enumerate(dets):
    x1,y1,x2,y2 = map(int, det['xyxy'])
    roi = img[y1:y2, x1:x2]   # crop in original pixel coords
    cv2.imwrite(f'roi_{i}.png', roi)



0: 1280x1280 1 Stenosis, 9.0ms
Speed: 15.2ms preprocess, 9.0ms inference, 67.3ms postprocess per image at shape (1, 3, 1280, 1280)
Detections: [{'xyxy': [69.92406463623047, 270.882080078125, 86.36941528320312, 293.2762145996094], 'xywh': [78.14674377441406, 282.07916259765625, 16.445350646972656, 22.394134521484375], 'conf': 0.20134316384792328, 'class': 0}]


yolo predict \
  model=runs/detect/train/weights/best.pt \
  source=test/images \
  save=True
