In [1]:
import os
import sys
import torch
import torchvision.transforms as T
import numpy as np
import pandas as pd
import cv2
import json
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm
from ultralytics import YOLO

parent_dir = os.path.abspath('../../')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from utils.heatmap import generate_multi_gaussian_heatmaps, decode_heatmaps
from utils.image_handling import crop_image, pad_bbox
from utils.keypoints import crop_and_resize_keypoints
from utils.evaluation import compute_add, compute_adds, compute_mde, compute_pck, compute_reprojection_error, estimate_pose_pnp, estimate_pose_pnp_ransac

In [2]:
BASE_PATH = Path("../../")

In [3]:
# Load YOLO model
def load_yolo_model(model_path):
    model = YOLO(model_path)
    return model

# Run YOLO inference
def run_yolo_inference(model, image_path):
    results = model(image_path, verbose=False)
    return results

In [4]:
model_path = BASE_PATH / "models/yolo/yolo-lm.pt"   
yolo_model = load_yolo_model(model_path)

In [5]:
def crop_and_resize(image, bbox, target_size):
    # Crop the image using the bounding box
    x, y, w, h = pad_bbox(bbox)
    cropped_image = image[y:y+h, x:x+w]
    # Resize the cropped image to the target size
    resized_image = cv2.resize(cropped_image, (target_size[0], target_size[1]))

    return resized_image

In [6]:
def preprocess_image(image_path,obj_id, fallback):
    image = cv2.imread(str(image_path))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Run inference
    results = yolo_model.predict(source=image, classes=[obj_id-1], conf=0.8, save=False, verbose=False)
    boxes = results[0].boxes.xyxy.cpu().numpy()
    if len(boxes) > 0:
        x1, y1, x2, y2 = boxes[0]
        x = int(x1)
        y = int(y1)
        w = int(x2)-x
        h = int(y2)-y
        crop_resized = crop_image(image, (x, y, w, h))
        crop_tensor = torch.from_numpy(crop_resized.astype(np.float32) / 255.0).permute(2, 0, 1).unsqueeze(0)
        bbox = (x, y, w, h)
        return crop_tensor,  bbox
    else:
        cropped_img = crop_image(image, fallback)
        resized_img = cv2.resize(cropped_img, (128,128))
        crop_tensor = torch.from_numpy(resized_img.astype(np.float32) / 255.0).permute(2, 0, 1).unsqueeze(0)
        return crop_tensor, fallback

In [7]:
from training.keypointnet import KeypointNet
from utils.keypoints import map_keypoints_to_original


def evaluate_model_on_dataset(obj_id):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    NUM_KEYPOINTS = 15
    IMAGE_SIZE =(128, 128)
    MODEL_PATH = BASE_PATH / f"models/r6dnet/obj_{obj_id:06d}.pt"
    model = KeypointNet(
        num_keypoints=NUM_KEYPOINTS,
        output_size=IMAGE_SIZE,
    ).to(device)

    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.eval()

    # test_images_dir = Path(f"datasets/test/{obj_id:06d}/rgb")
    annotations_path = BASE_PATH / f"data/annotations/test/{obj_id:06d}.json"
    keypoints_3D_path = BASE_PATH / f"data/keypoints3d/{obj_id:06d}.json"  # Adjust if needed

    with open(annotations_path, 'r') as f:
        annotations = json.load(f)

    # Load 3D keypoints (for PnP)
    with open(keypoints_3D_path, 'r') as f:
        keypoints_3D = np.array(json.load(f)['keypoints_3D'])  # (N, 3)

    results = []

    for ann in tqdm(annotations, desc="Processing images"):
        image_id = ann["image_id"]
        image_path = BASE_PATH / ann["rgb_path"]
        camera_matrix = np.array(ann['K']).reshape(3, 3)  # (3, 3)
        # bbox =  pad_bbox(ann['bbox_obj'])
        R_gt = np.array(ann['rotation'])
        t_gt = np.array(ann['translation'])

        img, bbox = preprocess_image(image_path, obj_id, fallback=pad_bbox(ann['bbox_obj']))
        img = img.to(device)

        gt_keypoints_2D = np.array(ann['keypoints_2D'])
        
        with torch.no_grad():
            output = model(img)

        output = output.squeeze(0).cpu().numpy()  # (N, H, W)
        # MSE between heatmaps (approximate by sum squared diff)
        pred_heatmaps = output
        pred_keypoints = decode_heatmaps(output)
        pred_keypoints_orig = map_keypoints_to_original(pred_keypoints, bbox, IMAGE_SIZE)


        # Assume ground truth heatmaps would be generated separately if needed (skip for now)
        mse = np.mean(pred_heatmaps ** 2)

   
        mde = compute_mde(pred_keypoints_orig, gt_keypoints_2D)
        pck = compute_pck(pred_keypoints_orig, gt_keypoints_2D, threshold=5.0)
    

        try:
            rvec_pred, tvec_pred = estimate_pose_pnp(keypoints_3D, pred_keypoints_orig, camera_matrix)
            R_pred, _ = cv2.Rodrigues(rvec_pred)
            reproj_error = compute_reprojection_error(R_pred, tvec_pred, keypoints_3D, pred_keypoints_orig, camera_matrix)
            add = compute_add(R_pred, tvec_pred, R_gt, t_gt, keypoints_3D)
            adds = compute_adds(R_pred, tvec_pred, R_gt, t_gt, keypoints_3D)
        except RuntimeError:
            reproj_error = np.nan
            add = np.nan
            adds = np.nan


             # Solve PnP
        try:
            rvec_pred_ransac, tvec_pred_ransac = estimate_pose_pnp_ransac(keypoints_3D, pred_keypoints_orig, camera_matrix, iterationsCount=5000, reprojectionError=20)
            R_pred_ransac, _ = cv2.Rodrigues(rvec_pred_ransac)
            reproj_error_ransac = compute_reprojection_error(R_pred_ransac, tvec_pred_ransac, keypoints_3D, pred_keypoints_orig, camera_matrix)
            add_ransac = compute_add(R_pred_ransac, tvec_pred_ransac, R_gt, t_gt, keypoints_3D)
            adds_ransac = compute_adds(R_pred_ransac, tvec_pred_ransac, R_gt, t_gt, keypoints_3D)
        except RuntimeError:
            reproj_error_ransac = np.nan
            add_ransac = np.nan
            adds_ransac = np.nan
            
        results.append({
            "image_id": image_id,
            "mse": mse,
            "mde": mde,
            "pck": pck,
            "reproj_error": reproj_error,
            "reproj_error_ransac": reproj_error_ransac,
            "add": add,
            "add_ransac": add_ransac,
            "adds": adds,
            "adds_ransac": adds_ransac,
        })

    df = pd.DataFrame(results)
    avg_metrics = df.mean(numeric_only=True)

    print(f"Average metrics over test set:\n{avg_metrics}")

    output_csv = Path(f"results/{obj_id:06d}/test_metrics_yolo.csv")
    output_csv.parent.mkdir(exist_ok=True, parents=True)
    df.to_csv(output_csv, index=False)
    print(f"Saved results to {output_csv}")

In [9]:
obj_id = 11
evaluate_model_on_dataset(obj_id)

Processing images: 100%|██████████| 1220/1220 [00:48<00:00, 25.38it/s]

Average metrics over test set:
image_id                  609.500000
mse                         0.027655
mde                        20.125682
pck                        36.907104
reproj_error             2684.756534
reproj_error_ransac        10.322645
add                         1.593421
add_ransac             136655.812803
adds                        1.539570
adds_ransac            136655.775942
dtype: float64
Saved results to results\000011\test_metrics_yolo.csv



