In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/LineMOD_DATASET/Linemod_preprocessed.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Linemod_preprocessed/segnet_results/11_label/0206_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0207_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0208_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0209_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0210_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0211_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0212_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0213_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0214_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0215_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0216_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0218_label.png  
  inflating: Linemod_preprocessed/s

In [3]:
!pip install pcl
!pip install open3d
!pip install plotly
!pip install pyyaml

Collecting pcl
  Downloading pcl-0.0.0.post1.tar.gz (1.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pcl
  Building wheel for pcl (setup.py) ... [?25l[?25hdone
  Created wheel for pcl: filename=pcl-0.0.0.post1-py3-none-any.whl size=1690 sha256=8fd673ea3336de15dc4f232e7942984bd83b846d8686d8c9e266066bc1262912
  Stored in directory: /root/.cache/pip/wheels/1b/bb/f6/c265bd6290eba943c39fb0181f694c57c2bf0d24f8348f7219
Successfully built pcl
Installing collected packages: pcl
Successfully installed pcl-0.0.0.post1
Collecting open3d
  Downloading open3d-0.19.0-cp311-cp311-manylinux_2_31_x86_64.whl.metadata (4.3 kB)
Collecting dash>=2.6.0 (from open3d)
  Downloading dash-3.0.4-py3-none-any.whl.metadata (10 kB)
Collecting configargparse (from open3d)
  Downloading ConfigArgParse-1.7-py3-none-any.whl.metadata (23 kB)
Collecting ipywidgets>=8.0.4 (from open3d)
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting addict 

In [4]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F  # Import for F.mse_loss
from torchvision import models
import cv2
import yaml
from tqdm import tqdm
import open3d as o3d
import numpy as np



In [5]:
def load_model_points(obj_id, model_path_root="/content/Linemod_preprocessed/data/models"):
    obj_filename = f"obj_{obj_id:02d}.ply"
    obj_path = os.path.join(model_path_root, obj_filename)
    mesh = o3d.io.read_triangle_mesh(obj_path)
    return np.asarray(mesh.vertices).astype(np.float32)  # (N, 3)


In [6]:
def transform_pts(points, R, t):
    return np.dot(R, points.T).T + t  # (N, 3)

In [7]:
def compute_add(pred_R, pred_t, gt_R, gt_t, model_points):
    pred_pts = transform_pts(model_points, pred_R, pred_t)
    gt_pts = transform_pts(model_points, gt_R, gt_t)
    return np.mean(np.linalg.norm(pred_pts - gt_pts, axis=1))

In [8]:
from scipy.spatial.transform import Rotation as R

def quat_to_rot_matrix(q):
    return R.from_quat(q).as_matrix()  # expects [x, y, z, w]


In [9]:
from tqdm import tqdm
import open3d as o3d
def evaluate_ADD(model, dataloader, device, model_path_root="/content/Linemod_preprocessed/models"):
    model.eval()
    add_scores = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            images = batch['image'].to(device)
            gt_rot = batch['rotation_matrix'].numpy()  # (B, 3, 3)
            gt_trans = batch['translation'].numpy()    # (B, 3)
            class_ids = batch['class_id'].numpy()      # (B,)

            # print(f"gt_rot={gt_rot} and gt_trans={gt_trans} and class_id={class_ids}")
            pred_quat, pred_trans = model(images)
            pred_quat = F.normalize(pred_quat, dim=1)
            # print(f"pred_quat={pred_quat} and pred_trans={pred_trans} ")
            # pred_quat: (B, 4), pred_trans: (B, 3)
            pred_rot = torch.zeros((len(pred_quat), 3, 3))


            for i in range(len(pred_quat)):
                rot = R.from_quat(pred_quat[i].cpu().numpy()).as_matrix()
                pred_rot[i] = torch.tensor(rot)

            for i in range(len(images)):
                obj_id = class_ids[i]
                model_points = load_model_points(obj_id, model_path_root)
                model_points = model_points / 1000.0


                add = compute_add(
                    pred_rot[i].numpy(), pred_trans[i].cpu().numpy(),
                    gt_rot[i], gt_trans[i],
                    model_points
                )
                add_scores.append(add)

    mean_add = np.mean(add_scores)
    print(f" Mean ADD: {mean_add:.4f} meters")

    return mean_add

In [10]:

###per class
from tqdm import tqdm
import torch
import torch.nn.functional as F
import numpy as np
from scipy.spatial.transform import Rotation as R
from collections import defaultdict

def evaluate_ADD_2(model, dataloader, device, model_path_root="/content/Linemod_preprocessed/models"):
    model.eval()
    add_scores_per_class = defaultdict(list)
    all_add_scores = []

    with torch.no_grad():
        for batch in tqdm(dataloader):
            images = batch['image'].to(device)
            gt_rot = batch['rotation_matrix'].numpy()  # (B, 3, 3)
            gt_trans = batch['translation'].numpy()    # (B, 3)
            class_ids = batch['class_id'].numpy()      # (B,)

            pred_quat, pred_trans = model(images)
            pred_quat = F.normalize(pred_quat, dim=1)
            pred_rot = torch.zeros((len(pred_quat), 3, 3))

            for i in range(len(pred_quat)):
                rot = R.from_quat(pred_quat[i].cpu().numpy()).as_matrix()
                pred_rot[i] = torch.tensor(rot)

            for i in range(len(images)):
                obj_id = int(class_ids[i])
                model_points = load_model_points(obj_id, model_path_root)
                model_points = model_points / 1000.0  # Convert mm to meters

                add = compute_add(
                    pred_rot[i].numpy(), pred_trans[i].cpu().numpy(),
                    gt_rot[i], gt_trans[i],
                    model_points
                )

                add_scores_per_class[obj_id].append(add)
                all_add_scores.append(add)

    # Compute mean ADD per class
    mean_add_per_class = {obj_id: np.mean(scores) for obj_id, scores in add_scores_per_class.items()}
    overall_mean_add = np.mean(all_add_scores)

    print(" Mean ADD per class:")
    for obj_id, mean_add in sorted(mean_add_per_class.items()):
        print(f"  Class {obj_id:02d}: {mean_add:.4f} meters")

    print(f"\n Overall Mean ADD: {overall_mean_add:.4f} meters")

    return mean_add_per_class, overall_mean_add


In [11]:
import os
import yaml
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from scipy.spatial.transform import Rotation as R

class LinemodPoseDataset(Dataset):
    def __init__(self, cropped_dir, linemod_root, transform=None):
        self.cropped_dir = cropped_dir
        self.linemod_root = linemod_root
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),

        ])
        self.img_filenames = sorted([
            f for f in os.listdir(cropped_dir) if f.endswith(".png")
        ])

        # Preload gt.yml data for all classes
        self.gt_data = {}
        for class_id in range(1, 16):
            class_str = f"{class_id:02d}"
            gt_path = os.path.join(linemod_root, class_str, "gt.yml")
            if os.path.exists(gt_path):
                with open(gt_path, 'r') as f:
                    self.gt_data[class_str] = yaml.safe_load(f)

    def __len__(self):
        return len(self.img_filenames)

    def __getitem__(self, idx):
        filename = self.img_filenames[idx]
        class_id, img_id_str = filename.split("_")
        img_id = int(os.path.splitext(img_id_str)[0])

        img_path = os.path.join(self.cropped_dir, filename)
        img = Image.open(img_path).convert('RGB')
        img_tensor = self.transform(img)

        # Find pose entry for this class in the list
        pose_list = self.gt_data[class_id][img_id]
        pose = next(item for item in pose_list if item['obj_id'] == int(class_id))

        R_mat = np.array(pose['cam_R_m2c']).reshape(3, 3).astype(np.float32)
        quat = R.from_matrix(R_mat).as_quat().astype(np.float32)  # [x, y, z, w]
        quat /= np.linalg.norm(quat)

        t_vec = np.array(pose['cam_t_m2c'], dtype=np.float32) /1000.0  #  in m

        # print(f"t_vec={t_vec} and R_mat={R_mat} and quat={quat} ")


        return {
            'image': img_tensor,
            'rotation': torch.tensor(quat),
            'rotation_matrix': torch.tensor(R_mat),
            'translation': torch.tensor(t_vec),  # stays in mm
            'class_id': int(class_id),
            'filename': filename
        }





In [12]:
import torch.nn as nn
import torchvision.models as models
class PoseNet6D(nn.Module):
    def __init__(self, pretrained=True):
        super(PoseNet6D, self).__init__()
        backbone = models.resnet50(
            weights=models.ResNet50_Weights.IMAGENET1K_V2 if pretrained else None
        )
        self.backbone = nn.Sequential(*list(backbone.children())[:-1])  # (B, 2048, 1, 1) # Remove final fc

        self.fc_rot = nn.Linear(2048, 4)   # Quaternion output (x, y, z, w)
        self.fc_trans = nn.Linear(2048, 3) # Translation output (x, y, z)

    def forward(self, x):
        features = self.backbone(x).squeeze()  # (B, 2048)
        rot = self.fc_rot(features)            # (B, 4)
        trans = self.fc_trans(features)        # (B, 3)
        rot = F.normalize(rot, dim=1)          # Unit quaternion
        return rot, trans


In [None]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F  # Import for F.mse_loss
from torchvision import models

cropped_dir = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/RGB_crop/train/train_cropped_objects"  # folder with all cropped images
linemod_root="/content/Linemod_preprocessed/data"
train_dir = "/content/train"
val_dir = "/content/val"
test_dir = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/RGB_crop/test/test_cropped_objects"
split_ratio = 0.8  # 80% train, 20% val

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

image_files = [f for f in os.listdir(cropped_dir) if f.endswith(".png")]
train_files, val_files = train_test_split(image_files, train_size=split_ratio, random_state=42)

# Copy training images
for file in train_files:
    shutil.copy(os.path.join(cropped_dir, file), os.path.join(train_dir, file))

# Copy validation images
for file in val_files:
    shutil.copy(os.path.join(cropped_dir, file), os.path.join(val_dir, file))

print(f"Train: {len(train_files)} images, Val: {len(val_files)} images")
# train_dataset = LinemodPoseDataset(train_dir, linemod_root)
val_dataset = LinemodPoseDataset(val_dir, linemod_root)
# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PoseNet6D()
model.to(device)
# Load model weights
model.load_state_dict(torch.load("/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/poseModel/posenet6d_best_model.pth"))
model.to(device)

# Create test DataLoader
test_dataset = LinemodPoseDataset(test_dir, linemod_root)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
save_path = "/content/drive/MyDrive/test_dataset_baseline.pt"
torch.save(test_dataset, save_path)
print(f"test_dataset saved at: {save_path}")

# print(" Validation Set ADD Evaluation:")
# mean_add_per_class, overall_mean_add=evaluate_ADD_2(model, val_loader, device)

print("Test Set ADD Evaluation:")
mean_add_per_class, overall_mean_add=evaluate_ADD_2(model, test_loader, device)




In [16]:
test_dataset = torch.load("/content/drive/MyDrive/test_dataset_baseline.pt", weights_only=False)
print("test_dataset loaded.")
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)
print("test loader done")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PoseNet6D()  #  import PoseNet6D model
model.to(device)
# Load model weights
model.load_state_dict(torch.load("/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/poseModel/posenet6d_best_model.pth"))
print("model loading done")
model.to(device)
mean_add_per_class, overall_mean_add=evaluate_ADD_2(model, test_loader, device)



test_dataset loaded.
test loader done
model loading done


100%|██████████| 838/838 [2:45:10<00:00, 11.83s/it]

📏 Mean ADD per class:
  Class 01: 0.1236 meters
  Class 02: 0.1009 meters
  Class 04: 0.1005 meters
  Class 05: 0.1033 meters
  Class 06: 0.1109 meters
  Class 08: 0.1105 meters
  Class 09: 0.1174 meters
  Class 10: 0.1127 meters
  Class 11: 0.1100 meters
  Class 12: 0.1164 meters
  Class 13: 0.1124 meters
  Class 14: 0.1066 meters
  Class 15: 0.1079 meters

📊 Overall Mean ADD: 0.1103 meters



