In [None]:
import os
import sys
import torch
import torchvision.transforms as T
import numpy as np
import pandas as pd
import cv2
import json
from pathlib import Path
import matplotlib.pyplot as plt
from tqdm import tqdm

parent_dir = os.path.abspath('../../')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from utils.heatmap import generate_multi_gaussian_heatmaps, decode_heatmaps
from utils.image_handling import crop_image, pad_bbox
from utils.keypoints import crop_and_resize_keypoints
from utils.evaluation import compute_add, compute_adds, compute_mde, compute_pck, compute_reprojection_error, estimate_pose_pnp, estimate_pose_pnp_ransac

In [3]:
BASE_PATH = Path("../../")

In [None]:
def preprocess_image(image_path, bbox):
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    cropped_img = crop_image(img, bbox)
    cropped_img = T.ToTensor()(cropped_img)
    return cropped_img.unsqueeze(0)

In [None]:
# from ultralytics import YOLO


# def preprocess_image_with_yolo(image_path):
#     YOLO_MODEL_PATH = BASE_PATH / "models/yolo/yolo-lm.pt"
#     yolo_model = YOLO(YOLO_MODEL_PATH)

#     img = cv2.imread(str(image_path))
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     cropped_img = crop_image(img, bbox)
#     cropped_img = T.ToTensor()(cropped_img)
#     return cropped_img.unsqueeze(0)

In [None]:
from training.keypointnet import KeypointNet


def evaluate_model_on_dataset(obj_id):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    NUM_KEYPOINTS = 15
    IMAGE_SIZE =(128, 128)
    MODEL_PATH = BASE_PATH / f"models/r6dnet/obj_{obj_id:06d}.pt"
    model = KeypointNet(
        num_keypoints=NUM_KEYPOINTS,
        output_size=IMAGE_SIZE,
    ).to(device)

    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model.eval()

    # test_images_dir = Path(f"datasets/test/{obj_id:06d}/rgb")
    annotations_path = BASE_PATH / f"data/annotations/test/{obj_id:06d}.json"
    keypoints_3D_path = BASE_PATH / f"data/keypoints3d/{obj_id:06d}.json"  # Adjust if needed

    with open(annotations_path, 'r') as f:
        annotations = json.load(f)

    # Load 3D keypoints (for PnP)
    with open(keypoints_3D_path, 'r') as f:
        keypoints_3D = np.array(json.load(f)['keypoints_3D'])  # (N, 3)

    results = []

    for ann in tqdm(annotations, desc="Processing images"):
        image_id = ann["image_id"]
        image_path = BASE_PATH / ann["rgb_path"]
        camera_matrix = np.array(ann['K']).reshape(3, 3)  # (3, 3)
        bbox =  pad_bbox(ann['bbox_obj'])
        gt_rotation = np.array(ann['rotation'])
        R_gt, _ = cv2.Rodrigues(gt_rotation)
        t_gt = np.array(ann['translation'])

        img = preprocess_image(image_path, bbox=bbox).to(device)

        gt_keypoints_2D = crop_and_resize_keypoints(np.array(ann['keypoints_2D']), bbox)
        
        with torch.no_grad():
            output = model(img)

        output = output.squeeze(0).cpu().numpy()  # (N, H, W)
        # MSE between heatmaps (approximate by sum squared diff)
        pred_heatmaps = output
        pred_keypoints = decode_heatmaps(output)
        # Assume ground truth heatmaps would be generated separately if needed (skip for now)
        mse = np.mean(pred_heatmaps ** 2)

        mde = compute_mde(pred_keypoints, gt_keypoints_2D)
        pck = compute_pck(pred_keypoints, gt_keypoints_2D, threshold=5.0)

        # Solve PnP
        try:
            rvec_pred, tvec_pred = estimate_pose_pnp(keypoints_3D, pred_keypoints, camera_matrix)
            R_pred, _ = cv2.Rodrigues(rvec_pred)
            reproj_error = compute_reprojection_error(rvec_pred, tvec_pred, keypoints_3D, gt_keypoints_2D, camera_matrix)
            add = compute_add(R_pred, tvec_pred, R_gt, t_gt, keypoints_3D)
            adds = compute_adds(R_pred, tvec_pred, R_gt, t_gt, keypoints_3D)
        except RuntimeError:
            reproj_error = np.nan
            add = np.nan
            adds = np.nan


             # Solve PnP
        try:
            rvec_pred_ransac, tvec_pred_ransac = estimate_pose_pnp_ransac(keypoints_3D, pred_keypoints, camera_matrix)
            R_pred_ransac, _ = cv2.Rodrigues(rvec_pred_ransac)
            reproj_error_ransac = compute_reprojection_error(rvec_pred_ransac, tvec_pred_ransac, keypoints_3D, gt_keypoints_2D, camera_matrix)
            add_ransac = compute_add(R_pred_ransac, tvec_pred_ransac, R_gt, t_gt, keypoints_3D)
            adds_ransac = compute_adds(R_pred_ransac, tvec_pred_ransac, R_gt, t_gt, keypoints_3D)
        except RuntimeError:
            reproj_error_ransac = np.nan
            add_ransac = np.nan
            adds_ransac = np.nan

        results.append({
            "image_id": image_id,
            "mse": mse,
            "mde": mde,
            "pck": pck,
            "reproj_error": reproj_error,
            "reproj_error_ransac": reproj_error_ransac,
            "add": add,
            "add_ransac": add_ransac,
            "adds": adds,
            "adds_ransac": adds_ransac,
        })

    df = pd.DataFrame(results)
    avg_metrics = df.mean(numeric_only=True)

    print(f"Average metrics over test set:\n{avg_metrics}")

    output_csv = Path(f"results/{obj_id:06d}/test_metrics_gt.csv")
    output_csv.parent.mkdir(exist_ok=True, parents=True)
    df.to_csv(output_csv, index=False)
    print(f"Saved results to {output_csv}")

In [None]:
obj_id = 1
evaluate_model_on_dataset(obj_id)