In [10]:
# ──────────────────────────────────────────────────────────────────────────────
#  hardest_validation_samples.py
# ──────────────────────────────────────────────────────────────────────────────
from os import name
from pathlib import Path
import math, json, numpy as np
from ultralytics import YOLO
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm


def hardest_validation_samples(
    model_weight: str,
    data_yaml: str,
    save_dir: str = "runs/val_hardness",
    conf_thr: float = 0.25,
    top_percent: float = 0.10,
    iou_type: str = "keypoints",        # "keypoints"  |  "bbox"
    oks_iou: float = 0.50               # IoU / OKS threshold that counts as “found”
):
    """
    Parameters
    ----------
    model_weight : str
        Path to the trained *.pt* checkpoint.
    data_yaml : str
        Your dataset YAML (must list train/val paths and key-point metadata).
    save_dir : str, optional
        Where intermediate JSON/TXT files will be written.
    conf_thr : float, optional
        Minimum confidence a prediction must have to be kept during validation.
    top_percent : float, optional
        Fraction of validation images to return as the “hardest” subset.
    iou_type : {"keypoints","bbox"}, optional
        Metric family to evaluate with COCOeval.
    oks_iou : float, optional
        IoU/OKS level that defines a *successful* detection.
    ---------------------------------------------------------------------------
    Returns
    -------
    hard_samples : list[(file_name:str, score:float)]
        Sorted hardest→easiest images and their AP/OKS scores.
    detected_percent : float
        Percentage of ground-truth objects that were correctly detected
        (IoU/OKS ≥ `oks_iou` and confidence ≥ `conf_thr`).
    """

    # 1) Ultralytics validation (writes predictions.json & labels.json)
    model = YOLO(model_weight)
    model.to('mps')

    if Path(save_dir+"/val").exists():
        print(f"Deleting previous validation results in {save_dir}/val")
        for f in Path(save_dir+"/val").glob("*"):
            f.unlink()
        Path(save_dir+"/val").rmdir()
    
    val_res = model.val(data=data_yaml,
                        save_json=True,
                        save_txt=False,
                        save_conf=True,
                        conf=conf_thr,
                        project=save_dir)

    work = Path(val_res.save_dir)                 # ← folder created by YOLO
    preds_json = work / "predictions.json"
    gts_json   = work / "labels.json"             # auto-generated GT in COCO format

    # 2) COCO-style evaluation, but we keep every per-image record
    coco_gt = COCO(str(gts_json))
    coco_dt = coco_gt.loadRes(str(preds_json))
    ev      = COCOeval(coco_gt, coco_dt, iouType=iou_type)
    ev.params.useCats = 1
    ev.evaluate(); ev.accumulate()

    thr_idx       = list(ev.params.iouThrs).index(oks_iou)
    img_scores    = {}
    gt_detected   = 0

    for rec in tqdm(ev.evalImgs):                       # one dict per image×category
        if rec is None:
            continue
        img_id      = rec["image_id"]
        # precision dims: [T×R×K×A×M]
        precisions  = rec["precision"][thr_idx, :, :, 0, 0]
        score       = np.nanmean(precisions)      # mean over recalls & classes
        img_scores.setdefault(img_id, []).append(score)

        # how many GTs matched a prediction at this IoU/OKS?
        matched     = (rec["matches"][thr_idx] > 0).sum()
        gt_detected += matched

    # mean over classes for each image
    img_scores = {k: float(np.nanmean(v)) for k, v in img_scores.items()}

    # 3) hardest N %
    n_imgs      = len(img_scores)
    n_hard      = max(1, math.ceil(n_imgs * top_percent))
    worst_first = sorted(img_scores.items(), key=lambda kv: kv[1])[:n_hard]
    hard_samples = [(coco_gt.imgs[i]["file_name"], s) for i, s in worst_first]

    # 4) overall detection percentage
    n_gt             = sum(len(v) for v in coco_gt.imgToAnns.values())
    detected_percent = 100.0 * gt_detected / n_gt if n_gt else 0.0

    return hard_samples, detected_percent


# ───────────────────────────── example usage ────────────────────────────────
'''hardest, pct = hardest_validation_samples(
    model_weight="/Users/tristan/Downloads/2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt",
    data_yaml="/Users/tristan/Downloads/yolo11_v04_trainingdata/data.yaml",
    conf_thr=0.25,           # same threshold you use in production
    top_percent=0.10,
)'''



'hardest, pct = hardest_validation_samples(\n    model_weight="/Users/tristan/Downloads/2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt",\n    data_yaml="/Users/tristan/Downloads/yolo11_v04_trainingdata/data.yaml",\n    conf_thr=0.25,           # same threshold you use in production\n    top_percent=0.10,\n)'

In [11]:
# ═════════════════════════════════════════════════════════════════════════════
#  Hard‑sample mining WITHOUT labels.json
# ═════════════════════════════════════════════════════════════════════════════
from pathlib import Path
import yaml, cv2, numpy as np
from tqdm.notebook import tqdm
from IPython.display import display, Image
from ultralytics import YOLO
import os
import csv                      # ✱ FP/DUP
from collections import Counter # ✱ FP/DUP

# ─────────────────────────────────────────────────────────────────────────────
#  CONFIG  – EDIT THESE
# ─────────────────────────────────────────────────────────────────────────────
MODEL_W   = "/Users/tristan/Downloads/4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt"     # your checkpoint
#MODEL_W = "/Users/tristan/Downloads/1280-yolo11m-pose-mosaic-2025-04-29-11_yolo11_v04_trainingdata-1-mAP5095_0.61153-mAP50_0.97339.pt"     
DATA_YAML = "/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/data.yaml"  # your dataset yaml
SAVE_DIR  = Path("val_hardness")             # ← must match the folder
IMG_SZ    = 2560                                   # same as your val size
CONF_THR  = 0.25                                  # deployment conf
TOP_PERC  = 1.0                                  # top‑10 % hardest
IOU_THR   = 0.50

# ─────────────────────────────────────────────────────────────────────────────

# ═════════════════════════════════════════════════════════════════════════════
#  helper: IoU for two XYXY boxes
# ═════════════════════════════════════════════════════════════════════════════
def xywhn_to_xyxy(box, w, h):
    xc, yc, bw, bh = box
    return np.array([(xc - bw/2)*w,
                     (yc - bh/2)*h,
                     (xc + bw/2)*w,
                     (yc + bh/2)*h])

def box_iou_matrix(boxes1, boxes2):
    """
    boxes*: (N,4) or (M,4) arrays, xyxy in absolute pixels.
    Returns IoU matrix shape (N, M).
    """
    if boxes1.size == 0 or boxes2.size == 0:
        return np.zeros((len(boxes1), len(boxes2)), dtype=float)
    tl = np.maximum(boxes1[:, None, :2], boxes2[None, :, :2])  # top‑left
    br = np.minimum(boxes1[:, None, 2:], boxes2[None, :, 2:])  # bottom‑right
    wh = np.clip(br - tl, 0, None)
    inter = wh[:, :, 0] * wh[:, :, 1]
    a1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    a2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
    return inter / (a1[:, None] + a2[None, :] - inter + 1e-6)

# ═════════════════════════════════════════════════════════════════════════════
#  Part 1 – find hardest validation frames
# ═════════════════════════════════════════════════════════════════════════════
# … all previous imports & helpers stay exactly the same …

# ═════════════════════════════════════════════════════════════════════════════
#  Part 1 – find hardest frames  (now ranked by two metrics)
# ═════════════════════════════════════════════════════════════════════════════
def hardest_validation_samples_txt(
    model_weight: str,
    data_yaml: str,
    conf_thr: float = 0.25,
    iou_thr: float = 0.50,
    top_percent: float = 0.10,
    img_size: int = 640,
    subset: str = "val",
):
    meta      = yaml.safe_load(open(data_yaml))
    data_root = Path(data_yaml).resolve().parent
    val_root  = (data_root / meta[subset]).resolve()
    is_txt_listing = val_root.suffix == ".txt"

    if is_txt_listing:
        with open(val_root) as f:
            img_paths = [Path(p.strip()) for p in f if p.strip()]
    else:
        img_paths = sorted([p for p in val_root.rglob("*")
                            if p.suffix.lower() in {".jpg", ".jpeg", ".png"}])

    model = YOLO(model_weight).to("mps")

    per_image_stats = []      # ★ MULTI‑METRIC  (store all we need in 1 tuple)
    csv_rows        = []
    total_gt = total_found = fp_total = dup_total = 0

    for p in tqdm(img_paths, desc="validating"):
        lbl_path = Path(f"{data_root}/{meta[subset]}/../labels") / p.with_suffix(".txt").name
        gt_boxes = np.empty((0,4))
        if lbl_path.exists() and lbl_path.stat().st_size:
            h, w = cv2.imread(str(p)).shape[:2]
            rows = np.loadtxt(lbl_path, ndmin=2, dtype=float)
            gt_boxes = np.stack([xywhn_to_xyxy(r[1:5], w, h) for r in rows])
        n_gt = len(gt_boxes)

        pred = model.predict(str(p), imgsz=img_size, conf=conf_thr,
                             verbose=False)[0]
        pred_boxes = pred.boxes.xyxy.cpu().numpy() if pred.boxes else np.empty((0,4))
        n_pred = len(pred_boxes)

        false_pos = duplicates = found = 0
        if n_pred:
            if n_gt:
                ious = box_iou_matrix(gt_boxes, pred_boxes)
                best_gt   = ious.argmax(0)
                best_iou  = ious[best_gt, range(n_pred)]
                assigned  = np.where(best_iou >= iou_thr, best_gt, -1)
                ctr       = Counter(assigned[assigned >= 0])
                found      = len(ctr)
                duplicates = sum(c-1 for c in ctr.values())
                false_pos  = (assigned == -1).sum()
            else:
                false_pos = n_pred

        recall = found / n_gt if n_gt else 1.0

        # ★ MULTI‑METRIC — keep both recall & fp+dup
        per_image_stats.append((str(p), recall, false_pos + duplicates))

        csv_rows.append([str(p), n_gt, n_pred, found,
                         false_pos, duplicates, recall])

        total_gt    += n_gt
        total_found += found
        fp_total    += false_pos
        dup_total   += duplicates

    # ★ MULTI‑METRIC — sort by tuple:  (low recall  →  high fp+dup)
    n_hard = max(1, int(len(per_image_stats) * top_percent))
    hard_sorted = sorted(
        per_image_stats,
        key=lambda t: (-t[2], t[1])   # (recall ↑ , fp+dup ↓)  so we invert fp
    )[:n_hard]

    overall_recall = 100.0 * total_found / total_gt if total_gt else 0.0

    # … CSV / TXT saving section stays unchanged …
    # (use csv_rows as before)

    return hard_sorted, overall_recall


# ═════════════════════════════════════════════════════════════════════════════
#  Run the pipeline
# ═════════════════════════════════════════════════════════════════════════════
'''hardest, pct = hardest_validation_samples_txt(
    model_weight=MODEL_W,
    data_yaml=DATA_YAML,
    conf_thr=CONF_THR,
    iou_thr=IOU_THR,
    top_percent=TOP_PERC,
    img_size=IMG_SZ,
    subset="train",
)
print(f"\n📊  overall recall (IoU≥{IOU_THR}, conf≥{CONF_THR}) = {pct:.1f}%")
'''

'hardest, pct = hardest_validation_samples_txt(\n    model_weight=MODEL_W,\n    data_yaml=DATA_YAML,\n    conf_thr=CONF_THR,\n    iou_thr=IOU_THR,\n    top_percent=TOP_PERC,\n    img_size=IMG_SZ,\n    subset="train",\n)\nprint(f"\n📊  overall recall (IoU≥{IOU_THR}, conf≥{CONF_THR}) = {pct:.1f}%")\n'

In [12]:
from pathlib import Path
import yaml, cv2, numpy as np
from tqdm.notebook import tqdm
from IPython.display import display, Image
from ultralytics import YOLO
import os
import csv                      # ✱ FP/DUP
from collections import Counter # ✱ FP/DUP


# ═════════════════════════════════════════════════════════════════════════════
#  evaluate_model_txt  – now with mAP50‑95
# ═════════════════════════════════════════════════════════════════════════════
def evaluate_model_txt(
    model_weight: str,
    data_yaml: str,
    subset: str = "val",
    conf_thr: float = 0.25,
    iou_thr: float = 0.50,   # still used for recall / precision columns
    img_size: int = 640,
):
    """
    Analyse *every* image in `subset` (train/val) and return:

        { model, subset, n_images, n_gt, n_pred,
          matched_gt, false_pos, duplicates,
          recall, precision,
          mAP50, mAP50_95 }

    * mAP50 is AP at IoU ≥ `iou_thr` (default 0.50)
    * mAP50_95 is the mean AP over IoU = 0.50 … 0.95 (step 0.05)
      – 10 thresholds, class‑agnostic.
    """
    # ------------------------------------------------------------------ setup
    meta      = yaml.safe_load(open(data_yaml))
    data_root = Path(data_yaml).resolve().parent
    split_dir = (data_root / meta[subset]).resolve()

    img_paths = ( [Path(p.strip()) for p in open(split_dir) if p.strip()]
                  if split_dir.suffix == ".txt"
                  else sorted([p for p in split_dir.rglob("*")
                               if p.suffix.lower() in {".jpg", ".jpeg", ".png"}]) )

    model = YOLO(model_weight).to("mps")

    # running tallies --------------------------------------------------------
    total_gt = total_pred = matched_gt = fp_total = dup_total = 0

    # AP data  ---------------------------------------------------------------
    iou_thresholds = np.arange(0.50, 0.96, 0.05)         # 0.50 … 0.95
    n_thr          = len(iou_thresholds)
    scores_all     = [[] for _ in range(n_thr)]          # per‑thr score list
    tp_flags_all   = [[] for _ in range(n_thr)]          # per‑thr TP mask


    per_image_stats = []  

    # ---------------------------------------------------------------- loop over images
    for img_path in tqdm(img_paths, desc=f"evaluating {subset}"):
        lbl_path = (data_root / meta[subset] / "../labels" /
                    img_path.with_suffix(".txt").name).resolve()

        gt_boxes = np.empty((0, 4))
        if lbl_path.exists() and lbl_path.stat().st_size:
            h, w = cv2.imread(str(img_path)).shape[:2]
            rows = np.loadtxt(lbl_path, ndmin=2, dtype=float)
            gt_boxes = np.stack([xywhn_to_xyxy(r[1:5], w, h) for r in rows])
        n_gt = len(gt_boxes)
        total_gt += n_gt

        # make separate 'seen' flags for every IoU threshold
        gt_seen = [np.zeros(n_gt, dtype=bool) for _ in range(n_thr)]

        pred = model.predict(str(img_path), imgsz=img_size,
                             conf=conf_thr, verbose=False)[0]
        # ── handle case: NO predictions ─────────────────────────────────
        if pred.boxes is None or len(pred.boxes) == 0:
            matched_here = 0
            fp_local     = dup_local = 0
            recall_img   = 0.0 if n_gt else 1.0
            missed_local = n_gt - matched_here                          # ★ add FN
            per_image_stats.append(
                (str(img_path), recall_img, fp_local + dup_local + missed_local)
            )
            matched_gt  += matched_here
            fp_total    += fp_local
            dup_total   += dup_local
            continue

        boxes   = pred.boxes.xyxy.cpu().numpy()
        scores  = pred.boxes.conf.cpu().numpy()
        order   = scores.argsort()[::-1]          # high → low
        boxes, scores = boxes[order], scores[order]
        total_pred += len(boxes)

        # IoU matrix once per image
        ious_img = ( box_iou_matrix(boxes, gt_boxes) if n_gt else
                     np.zeros((len(boxes), 0)) )

        # ---- per prediction, update every threshold -------------------
        for j, (bx, sc) in enumerate(zip(boxes, scores)):
            ious_pred = ious_img[j] if n_gt else []

            for t_idx, thr in enumerate(iou_thresholds):
                is_tp = False
                if n_gt:
                    best = ious_pred.argmax()
                    if ious_pred[best] >= thr and not gt_seen[t_idx][best]:
                        is_tp = True
                        gt_seen[t_idx][best] = True
                scores_all[t_idx].append(sc)
                tp_flags_all[t_idx].append(int(is_tp))

        # counts for recall/precision columns (use iou_thr = 0.50 by default)
        # counts for recall / FP / dup at reference IoU
        # counts for recall / FP / dup at reference IoU
        if n_gt:
            ref_idx      = iou_thresholds.tolist().index(iou_thr)
            matched_here = gt_seen[ref_idx].sum()
            fp_local     = len(boxes) - matched_here
            dup_local    = max(0, fp_local - (n_gt - matched_here))
            recall_img   = matched_here / n_gt
            missed_local = n_gt - matched_here                          # ★ add FN
        else:
            matched_here = 0
            fp_local     = len(boxes)
            dup_local    = 0
            recall_img   = 1.0
            missed_local = 0

        # ★ NEW ➌  – store (path, recall, FP+dup) for this image
        per_image_stats.append(
            (str(img_path), recall_img, fp_local + dup_local + missed_local)
        )

        matched_gt  += matched_here
        fp_total    += fp_local
        dup_total   += dup_local

    # ---------------------------------------------------------------- AP calc
    def ap_from_lists(scores, tps, total_gt):
        s = np.asarray(scores)
        t = np.asarray(tps)
        order = s.argsort()[::-1]
        t = t[order]
        fp = 1 - t
        tp_cum = np.cumsum(t)
        fp_cum = np.cumsum(fp)
        rec = tp_cum / (total_gt + 1e-6)
        prec = tp_cum / (tp_cum + fp_cum + 1e-6)
        return np.trapz(prec, rec)                    # trapezoidal AP

    ap_list = [ap_from_lists(sc, tp, total_gt) for sc, tp in
               zip(scores_all, tp_flags_all)]
    mAP50     = ap_list[0]
    mAP50_95  = sum(ap_list) / n_thr

    # ---------------------------------------------------------------- bundle
    metrics = dict(
        model       = Path(model_weight).name,
        subset      = subset,
        n_images    = len(img_paths),
        n_gt        = total_gt,
        n_pred      = total_pred,
        matched_gt  = matched_gt,
        false_pos   = fp_total,
        duplicates  = dup_total,
        recall      = 100.0 * matched_gt / total_gt if total_gt else 0.0,
        precision   = 100.0 * matched_gt / total_pred if total_pred else 0.0,
        mAP50       = mAP50   * 100.0,
        mAP50_95    = mAP50_95 * 100.0,
        per_image   = per_image_stats,        
        data_yaml  = data_yaml,
    )

    import torch
    if torch.backends.mps.is_available():
        # Release GPU memory on macOS
        torch.mps.empty_cache()

    return metrics

In [13]:
#MODEL_W   = "/Users/tristan/Downloads/2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt"     # your checkpoint
#MODEL_W = "/Users/tristan/Downloads/1280-yolo11m-pose-mosaic-2025-04-29-11_yolo11_v04_trainingdata-1-mAP5095_0.61153-mAP50_0.97339.pt"     
#DATA_YAML = "/Users/tristan/trex/docs/notebooks/wolf-trial-pose-26/data.yaml"  # your dataset yaml
DATA_YAML = "/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/data.yaml"  # your dataset yaml
SAVE_DIR  = Path("val_hardness")             # ← must match the folder
IMG_SZ    = 2560                                   # same as your val size
CONF_THR  = 0.25                                  # deployment conf
TOP_PERC  = 1.0                                  # top‑10 % hardest
IOU_THR   = 0.50

def xywhn_to_xyxy(box, w, h):
    xc, yc, bw, bh = box
    return np.array([(xc - bw/2)*w,
                     (yc - bh/2)*h,
                     (xc + bw/2)*w,
                     (yc + bh/2)*h])

def box_iou_matrix(boxes1, boxes2):
    """
    boxes*: (N,4) or (M,4) arrays, xyxy in absolute pixels.
    Returns IoU matrix shape (N, M).
    """
    if boxes1.size == 0 or boxes2.size == 0:
        return np.zeros((len(boxes1), len(boxes2)), dtype=float)
    tl = np.maximum(boxes1[:, None, :2], boxes2[None, :, :2])  # top‑left
    br = np.minimum(boxes1[:, None, 2:], boxes2[None, :, 2:])  # bottom‑right
    wh = np.clip(br - tl, 0, None)
    inter = wh[:, :, 0] * wh[:, :, 1]
    a1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
    a2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
    return inter / (a1[:, None] + a2[None, :] - inter + 1e-6)

In [15]:
metrics_s   = evaluate_model_txt(
    "/Users/tristan/Downloads/4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt", 
    DATA_YAML, 
    subset="val", img_size=4096)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [16]:
metrics_x   = evaluate_model_txt(
    "/Users/tristan/Downloads/2560-yolo11x-pose-2025-06-02-21_dataset_reordered-1-mAP5095_0.66212-mAP50_0.96698.pt", 
    DATA_YAML, 
    subset="val", img_size=2560)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [17]:
metrics_x_old   = evaluate_model_txt(
    "/Users/tristan/Downloads/2560-yolo11x-pose-2025-06-02-09_dataset_reordered-1-mAP5095_0.65425-mAP50_0.97899.pt", 
    DATA_YAML, 
    subset="val", img_size=2560)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [18]:
metrics_old   = evaluate_model_txt(
    "/Users/tristan/Downloads/3008-yolo11s-pose-mosaic1.0-2025-05-28-14_wolf-trial-pose-7-mAP5095_0.69371-mAP50_0.98245.pt", 
    DATA_YAML, 
    subset="val", img_size=3008)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [19]:
metrics_new_nano   = evaluate_model_txt(
    "/Users/tristan/Downloads/4096-yolo11n-pose-2025-06-07-11_dataset_reordered-1-mAP5095_0.6639-mAP50_0.97791.pt", 
    DATA_YAML, 
    subset="val", img_size=4096)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [21]:
metrics_new_nano2   = evaluate_model_txt(
    "/Users/tristan/Downloads/4096-yolo11n-pose-2025-06-07-23_dataset_reordered-1-mAP5095_0.66337-mAP50_0.97718.pt", 
    DATA_YAML, 
    subset="val", img_size=4096)

evaluating val:   0%|          | 0/469 [00:00<?, ?it/s]

In [22]:

import pandas as pd
pd.DataFrame([metrics_s, metrics_x, metrics_x_old, metrics_old, metrics_new_nano, metrics_new_nano2]).set_index("model")

Unnamed: 0_level_0,subset,n_images,n_gt,n_pred,matched_gt,false_pos,duplicates,recall,precision,mAP50,mAP50_95,per_image,data_yaml
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt,val,469,3055,3280,2959,321,250,96.85761,90.213415,95.813048,58.539692,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
2560-yolo11x-pose-2025-06-02-21_dataset_reordered-1-mAP5095_0.66212-mAP50_0.96698.pt,val,469,3055,3136,2919,217,134,95.548282,93.080357,93.787775,60.057542,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
2560-yolo11x-pose-2025-06-02-09_dataset_reordered-1-mAP5095_0.65425-mAP50_0.97899.pt,val,469,3055,3146,2907,239,157,95.155483,92.403051,93.686656,57.174525,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
3008-yolo11s-pose-mosaic1.0-2025-05-28-14_wolf-trial-pose-7-mAP5095_0.69371-mAP50_0.98245.pt,val,469,3055,3141,2934,207,148,96.03928,93.409742,95.642982,65.884866,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
4096-yolo11n-pose-2025-06-07-11_dataset_reordered-1-mAP5095_0.6639-mAP50_0.97791.pt,val,469,3055,3136,2952,184,120,96.628478,94.132653,95.950824,63.460178,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
4096-yolo11n-pose-2025-06-07-23_dataset_reordered-1-mAP5095_0.66337-mAP50_0.97718.pt,val,469,3055,3180,2951,229,162,96.595745,92.798742,95.915285,64.0546,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....


In [20]:

import pandas as pd
pd.DataFrame([metrics_s, metrics_x, metrics_x_old, metrics_old]).set_index("model")

Unnamed: 0_level_0,subset,n_images,n_gt,n_pred,matched_gt,false_pos,duplicates,recall,precision,mAP50,mAP50_95,per_image,data_yaml
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt,val,469,3055,3280,2959,321,250,96.85761,90.213415,95.813048,58.539692,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
2560-yolo11x-pose-2025-06-02-21_dataset_reordered-1-mAP5095_0.66212-mAP50_0.96698.pt,val,469,3055,3136,2919,217,134,95.548282,93.080357,93.787775,60.057542,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
2560-yolo11x-pose-2025-06-02-09_dataset_reordered-1-mAP5095_0.65425-mAP50_0.97899.pt,val,469,3055,3146,2907,239,157,95.155483,92.403051,93.686656,57.174525,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....
3008-yolo11s-pose-mosaic1.0-2025-05-28-14_wolf-trial-pose-7-mAP5095_0.69371-mAP50_0.98245.pt,val,469,3055,3141,2934,207,148,96.03928,93.409742,95.642982,65.884866,[(/Users/tristan/Downloads/wolf trial pose.v10...,/Users/tristan/Downloads/wolf trial pose.v10i....


In [41]:

metrics_m   = evaluate_model_txt(
                 "/Users/tristan/Downloads/1280-yolo11m-pose-mosaic-2025-04-29-11_yolo11_v04_trainingdata-1-mAP5095_0.61153-mAP50_0.97339.pt"  , DATA_YAML,
                 subset="val",img_size=1280)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [42]:

metrics_2560m   = evaluate_model_txt(
                 "/Users/tristan/Downloads/2560-yolo11m-pose-mosaic1-2025-05-03-13_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.50636-mAP50_0.96336.pt"  , DATA_YAML,
                 subset="val",img_size=2560)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [43]:

metrics_mAP5095_067422mAP50_098795   = evaluate_model_txt(
                 "/Users/tristan/Downloads/2560-yolo11s-pose-mosaic1-2025-05-02-23_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.67422-mAP50_0.98795.pt"  , DATA_YAML,
                 subset="val",img_size=2560)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [44]:

metrics_adwait   = evaluate_model_txt(
                 "/Users/tristan/Downloads/yolo11_wolf_V047.pt"  , DATA_YAML,
                 subset="val",img_size=2048)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [45]:

metrics_3008   = evaluate_model_txt(
                 "/Users/tristan/Downloads/3008-yolo11n-pose-mosaic0.5-2025-05-04-14_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.58645-mAP50_0.96939.pt"  , DATA_YAML,
                 subset="val",img_size=3008)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [46]:

metrics_3008inter2   = evaluate_model_txt(
                 "/Users/tristan/Downloads/3008-yolo11n-pose-mosaic0.5-2025-05-05-07_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.60856-mAP50_0.97049.pt"  , DATA_YAML,
                 subset="val",img_size=3008)


evaluating val:   0%|          | 0/450 [00:00<?, ?it/s]

In [None]:

metrics_3008inter3   = evaluate_model_txt(
                 "/Users/tristan/Downloads/3008-yolo11n-pose-mosaic0.25-2025-05-05-16_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.62367-mAP50_0.97582.pt"  , "/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/data.yaml",
                 subset="val",img_size=3008)


evaluating val:   0%|          | 0/9 [00:00<?, ?it/s]

In [69]:
metrics_3008latest   = evaluate_model_txt(
                 "/Users/tristan/Downloads/3008-yolo11n-pose-mosaic0.25-2025-05-06-18_wolf-trial-pose-5-mAP5095_0.61305-mAP50_0.97417.pt"  , "/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/data.yaml",
                 subset="val",img_size=3008)

evaluating val:   0%|          | 0/459 [00:00<?, ?it/s]

In [70]:
metrics_s_extra   = evaluate_model_txt(
    "/Users/tristan/Downloads/2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt", 
    "/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/data.yaml", 
    subset="val", img_size=2560)

metrics_s_extra

evaluating val:   0%|          | 0/459 [00:00<?, ?it/s]

{'model': '2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt',
 'subset': 'val',
 'n_images': 459,
 'n_gt': 2963,
 'n_pred': 2975,
 'matched_gt': 2862,
 'false_pos': 113,
 'duplicates': 64,
 'recall': 96.5912926088424,
 'precision': 96.2016806722689,
 'mAP50': 95.93205785062626,
 'mAP50_95': 65.0409044345664,
 'per_image': [('/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_114_113513_png_jpg.rf.66a075f33f72fa07bd7319dc5d362896.jpg',
   1.0,
   0),
  ('/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_120_119779_png_jpg.rf.cdc86da60686e7218fa9663cd3843fc9.jpg',
   1.0,
   0),
  ('/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_13_12451_png_jpg.rf.61715e578e5c70fa6ac17cf24a0b2c18.jpg',
 

In [71]:

import pandas as pd
pd.DataFrame([metrics_s_extra, metrics_3008latest]).set_index("model")

Unnamed: 0_level_0,subset,n_images,n_gt,n_pred,matched_gt,false_pos,duplicates,recall,precision,mAP50,mAP50_95,per_image,data_yaml
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt,val,459,2963,2975,2862,113,64,96.591293,96.201681,95.932058,65.040904,[(/Users/tristan/trex/docs/notebooks/wolf-tria...,/Users/tristan/trex/docs/notebooks/wolf-trial-...
3008-yolo11n-pose-mosaic0.25-2025-05-06-18_wolf-trial-pose-5-mAP5095_0.61305-mAP50_0.97417.pt,val,459,2963,3237,2856,381,312,96.388795,88.229842,95.326509,58.769114,[(/Users/tristan/trex/docs/notebooks/wolf-tria...,/Users/tristan/trex/docs/notebooks/wolf-trial-...


In [50]:

import pandas as pd
pd.DataFrame([metrics_s, metrics_m, metrics_2560m, 
              metrics_mAP5095_067422mAP50_098795, metrics_adwait, metrics_3008, metrics_3008inter2, metrics_3008inter3]).set_index("model")

Unnamed: 0_level_0,subset,n_images,n_gt,n_pred,matched_gt,false_pos,duplicates,recall,precision,mAP50,mAP50_95,per_image
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2560-yolo11s-pose-mosaic-2025-05-01-00_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.69337-mAP50_0.98331.pt,val,450,2941,2969,2856,113,64,97.109827,96.194005,96.450279,65.278979,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
1280-yolo11m-pose-mosaic-2025-04-29-11_yolo11_v04_trainingdata-1-mAP5095_0.61153-mAP50_0.97339.pt,val,450,2941,2844,2618,226,93,89.017341,92.053446,84.279721,47.789746,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
2560-yolo11m-pose-mosaic1-2025-05-03-13_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.50636-mAP50_0.96336.pt,val,450,2941,3087,2808,279,166,95.477729,90.962099,92.340565,45.01293,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
2560-yolo11s-pose-mosaic1-2025-05-02-23_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.67422-mAP50_0.98795.pt,val,450,2941,3043,2864,179,126,97.381843,94.117647,96.250639,63.349018,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
yolo11_wolf_V047.pt,val,450,2941,3190,2767,423,292,94.083645,86.739812,88.269467,52.455937,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
3008-yolo11n-pose-mosaic0.5-2025-05-04-14_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.58645-mAP50_0.96939.pt,val,450,2941,3261,2823,438,352,95.987759,86.568537,94.52438,55.249025,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
3008-yolo11n-pose-mosaic0.5-2025-05-05-07_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.60856-mAP50_0.97049.pt,val,450,2941,3261,2850,411,342,96.905814,87.396504,96.109604,57.837505,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...
3008-yolo11n-pose-mosaic0.25-2025-05-05-16_wolf_trial_pose_v4_yolo8_format-1-mAP5095_0.62367-mAP50_0.97582.pt,val,450,2941,3166,2850,316,251,96.905814,90.018951,96.01137,60.111465,[(/Users/tristan/Downloads/wolf_trial_pose_v4_...


In [None]:
import gc
gc.collect()

In [13]:
# ⇢ add this helper just above visualise_hardest_samples_txt ────────────────
def draw_filled_box_alpha(frame, box_xyxy, color_bgr, alpha=0.35):
    """
    Draw a filled box with transparency on `frame`.
    """
    overlay = frame.copy()
    x1, y1, x2, y2 = map(int, box_xyxy)
    cv2.rectangle(overlay, (x1, y1), (x2, y2), color_bgr, thickness=-1)
    cv2.addWeighted(overlay, alpha, frame, 1-alpha, 0, dst=frame)  # in‑place

# ═════════════════════════════════════════════════════════════════════════════
#  Part 2 – overlay GT + predictions for hardest frames
# ═════════════════════════════════════════════════════════════════════════════
def visualise_hardest_samples_txt(
    model_weight: str,
    data_yaml: str,
    save_dir: Path,
    hard_samples: list,
    img_size: int = 640,
    conf_thr: float = 0.25,
    display_inline: bool = True,
    subset: str = "val",
):
    save_dir = Path(save_dir)
    out_dir  = save_dir / Path(subset+"_overlay")
    out_dir.mkdir(parents=True, exist_ok=True)

    # dataset root for resolving paths when val set is a relative list
    meta     = yaml.safe_load(open(data_yaml))
    data_root = Path(data_yaml).resolve().parent
    label_root = (data_root / meta[subset] / ".." / "labels").resolve()
    print(f"subset = {meta[subset]}")
    print(f"Detected root {data_root} and labels in {label_root}")
    dataset_path = Path(meta.get("path", "")) if meta.get("path") else Path(".")

    # drawing colours
    C_GT  = (80, 240, 120)   # green
    C_PR  = (0,0,255)        # red

    def draw_box(img, box, color, thickness=2):
        x1,y1,x2,y2 = map(int, box)
        cv2.rectangle(img, (x1,y1), (x2,y2), color, thickness, cv2.LINE_AA)

    model = YOLO(model_weight)
    model.to('mps')
    overlay_paths = []

    for p_str, recall, mistakes in tqdm(hard_samples, desc="overlaying"):
        img_path = (dataset_path / p_str).resolve()
        img      = cv2.imread(str(img_path))
        h, w     = img.shape[:2]

        # --- draw GT boxes ---------------------------------------------
        lbl_path = os.path.basename(img_path.with_suffix(".txt"))
        lbl_path = label_root / lbl_path
        if lbl_path.exists() and not lbl_path.stat().st_size == 0:
            rows = np.loadtxt(lbl_path, ndmin=2, dtype=float)
            for row in rows:
                box = xywhn_to_xyxy(row[1:5], w, h)
                draw_filled_box_alpha(img, box, C_GT, alpha=0.25)
                draw_box(img, box, C_GT, thickness=1)
        else:
            print(f"Label file {lbl_path} not found.")
            continue

        # --- draw predictions ------------------------------------------
        preds = model.predict(str(img_path),
                              imgsz=img_size,
                              conf=conf_thr,
                              verbose=False)[0]
        for box in preds.boxes.xyxy.cpu().numpy():
            draw_box(img, box, C_PR)

        # annotate hardness
        cv2.putText(img, f"recall={recall:.2f}" + f"  FP+DUP={mistakes}",
                    (5,18), cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (255,255,255), 2, cv2.LINE_AA)

        outfile = out_dir / (str(mistakes)+"_"+img_path.stem + ".jpg")
        cv2.imwrite(str(outfile), img)
        overlay_paths.append(outfile)

        if display_inline:
            display(Image(filename=str(outfile)))

    print(f"✔  {len(overlay_paths)} overlays written to {out_dir.resolve()}")
    return overlay_paths

DISPLAY   = False                                # show in notebook?

metrics = metrics_s.copy()

if not "data_yaml" in metrics:
    raise ValueError("metrics['data_yaml'] not found")
print(metrics)

# sort metrics["per_image"] by mistakes
metrics["per_image"].sort(key=lambda x: -x[2])  # sort by FP+DUP
# top 10 hardest
hardest_samples = metrics["per_image"].copy()

if len(hardest_samples) > 100:
    hardest_samples = hardest_samples[:int(len(metrics["per_image"])*0.1)]

print(metrics["data_yaml"])
print(metrics["subset"])

visualise_hardest_samples_txt(
    model_weight="/Users/tristan/Downloads/"+metrics['model'],
    data_yaml=metrics["data_yaml"],
    save_dir=Path(metrics["data_yaml"]).parent / Path("eval") / Path("hardness_"+metrics["model"]+"_"+metrics["subset"]),
    hard_samples=hardest_samples,  # top 10 hardest
    img_size=IMG_SZ,
    conf_thr=CONF_THR,
    display_inline=DISPLAY,
    subset=metrics["subset"],                            # "train" or "val"
)

{'model': '4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt', 'subset': 'val', 'n_images': 469, 'n_gt': 3055, 'n_pred': 3248, 'matched_gt': 2905, 'false_pos': 343, 'duplicates': 245, 'recall': 95.0900163666121, 'precision': 89.4396551724138, 'mAP50': 93.69485393046763, 'mAP50_95': 54.17902845913174, 'per_image': [('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_114_113513_png_jpg.rf.66a075f33f72fa07bd7319dc5d362896.jpg', 1.0, 2), ('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_120_119779_png_jpg.rf.cdc86da60686e7218fa9663cd3843fc9.jpg', 1.0, 0), ('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/valid/images/0209_106_PM3_PGCP1_AD_DJI_0618_0209_106_PM3_PGCP1_AD_DJI_0618_P01_13_12451_png_jpg.rf.61715e578e5c70fa6ac17cf24a0b2c18.jpg', 1.0, 2), ('/Users/tristan/Downloads/wolf trial po

overlaying:   0%|          | 0/46 [00:00<?, ?it/s]

✔  46 overlays written to /Users/tristan/Downloads/wolf trial pose.v10i.yolov8/eval/hardness_4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt_val/val_overlay


[PosixPath('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/eval/hardness_4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt_val/val_overlay/22_2309_179_PM3_PGCP1_AG_DJI_0782_2309_179_PM3_PGCP1_AG_DJI_0782_P01_145_144286_png.rf.d8dd7659375ae26efc38edf960bc69bc.jpg'),
 PosixPath('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/eval/hardness_4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt_val/val_overlay/18_thermal-capture_MP4-0000_jpg.rf.26bc73d9bdc63f0c5899e8d5fbaffda6.jpg'),
 PosixPath('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/eval/hardness_4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt_val/val_overlay/12_0908_005_PM3_Turn_SH_DJI_0407_MP4-0066_jpg.rf.48880d229ebbe84b30aaaedef96400d8.jpg'),
 PosixPath('/Users/tristan/Downloads/wolf trial pose.v10i.yolov8/eval/hardness_4096-yolo11n-pose-2025-06-02-23_dataset_reordered-1-mAP5095_0.60908-mAP50_0.9115.pt_val/val_o

In [53]:
from roboflow import Roboflow
rf = Roboflow(api_key="AEq9S3vvyVts83OiRKvh")
project = rf.workspace("tmp-lavit").project("wolf-trial-pose-pymny")
version = project.version(5)
dataset = version.download("yolov8")
dataset.location    

loading Roboflow workspace...
loading Roboflow project...


'/Users/tristan/trex/docs/notebooks/wolf-trial-pose-5'

In [55]:
%%writefile {dataset.location}/data.yaml
train: train/images
val: valid/images

kpt_shape: [8, 3]
flip_idx: [0, 1, 2, 3, 4, 5, 6, 7]

nc: 1
names: ['wolf']

Overwriting /Users/tristan/trex/docs/notebooks/wolf-trial-pose-5/data.yaml


In [33]:
len(metrics_3008inter3["per_image"][:int(len(metrics_3008inter3["per_image"])*0.1)])

45

In [None]:
from ultralytics.data.converter import convert_coco

# point this at your COCO-style JSON folder,
# set use_keypoints=True to include the 17 COCO keypoints
convert_coco(
    labels_dir="/Users/tristan/Downloads/wolf_trial_pose_v4_yolo8 (2)/train/",
    save_dir="/Users/tristan/Downloads/wolf_trial_pose_v4_yolo8_format/train/",
    use_keypoints=True
)

convert_coco(
    labels_dir="/Users/tristan/Downloads/wolf_trial_pose_v4_yolo8 (2)/valid/",
    save_dir="/Users/tristan/Downloads/wolf_trial_pose_v4_yolo8_format/valid/",
    use_keypoints=True
)