In [4]:
from ultralytics import YOLO
import os
import cv2

YOLO_MODEL = 'checkpoints/model_main.pt' 
LINEMOD_ROOT = 'dataset/Linemod_preprocessed/data/' 
OUTPUT_CROP_FOLDER = 'Phase3_RGB_Crops' 
CROP_SIZE = (224, 224)  # Resize all crops to this size
CLASS_IDS = [f"{i:02d}" for i in range(1, 14)]  # '01' to '13'
# ===========================================

# 🔹 Create output directory
os.makedirs(OUTPUT_CROP_FOLDER, exist_ok=True)

# 🔹 Load YOLOv8 model
model = YOLO(YOLO_MODEL)

# 🔁 Loop over each class folder (01 to 13)
for cls_id in CLASS_IDS:
    class_path = os.path.join(LINEMOD_ROOT, cls_id)
    
    # Skip missing folders like '03' and '07'
    if not os.path.isdir(class_path):
        print(f"[SKIPPED] Class folder does not exist: {class_path}")
        continue

    class_path = os.path.join(LINEMOD_ROOT, cls_id)
    rgb_folder = os.path.join(class_path, 'rgb')
    train_file = os.path.join(class_path, 'train.txt')  

    # 🔸 Read training image IDs
    with open(train_file, 'r') as f:
        image_ids = [line.strip() for line in f.readlines() if line.strip()]

    print(f"[{cls_id}] Processing {len(image_ids)} training images.")

    for img_id in image_ids:
        img_path = os.path.join(rgb_folder, f"{img_id}.png")

        # Load image
        img = cv2.imread(img_path)
        if img is None:
            print(f"[WARNING] Could not read image: {img_path}")
            continue

        # 🔍 Run YOLOv8 inference
        results = model(img)

        for det_idx, det in enumerate(results[0].boxes):
            # Get bounding box coordinates
            x1, y1, x2, y2 = map(int, det.xyxy[0].tolist())

            # Clamp to image dimensions
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)

            # 🖼️ Crop the object
            cropped = img[y1:y2, x1:x2]

            # Resize for pose regressor
            cropped_resized = cv2.resize(cropped, CROP_SIZE)

            # Save crop with class and image ID in filename
            filename = f"cls{cls_id}_img{img_id}_det{det_idx}.png"
            save_path = os.path.join(OUTPUT_CROP_FOLDER, filename)
            cv2.imwrite(save_path, cropped_resized)

            print(f"✅ Saved: {filename}")

print("✅ All YOLOv8 crops saved for all classes.")

[01] Processing 186 training images.

0: 480x640 1 ape, 66.3ms
Speed: 2.4ms preprocess, 66.3ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0004_det0.png

0: 480x640 1 ape, 37.1ms
Speed: 2.9ms preprocess, 37.1ms inference, 2.3ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0009_det0.png

0: 480x640 1 ape, 7.9ms
Speed: 1.7ms preprocess, 7.9ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0021_det0.png

0: 480x640 1 ape, 8.6ms
Speed: 1.8ms preprocess, 8.6ms inference, 2.2ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0030_det0.png

0: 480x640 1 ape, 8.1ms
Speed: 1.2ms preprocess, 8.1ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0044_det0.png

0: 480x640 1 ape, 8.0ms
Speed: 1.4ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)
✅ Saved: cls01_img0048_det0.png

0: 480x640 1 ape, 18.1ms
Speed: 2.0ms pr

In [5]:
import os
import yaml
import csv

# ========== CONFIGURATION ==========
CROP_FOLDER = 'Phase3_RGB_Crops'
LINEMOD_ROOT = 'dataset/Linemod_preprocessed/data'
OUTPUT_CSV = 'pose_labels.csv'
# ===================================

# 🔹 Helper to read YAML
def load_yaml(path):
    with open(path, 'r') as f:
        return yaml.safe_load(f)

# 🔸 Open CSV file for writing
with open(OUTPUT_CSV, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(['crop_path', 'class_id', 'image_id', 'x', 'y', 'z'] +
                    [f'R{i}' for i in range(1, 10)])  # R1 to R9 for 3×3 matrix

    for filename in os.listdir(CROP_FOLDER):
        if not filename.endswith('.png'):
            continue

        # Example filename: cls01_img000123_det0.png
        parts = filename.split('_')
        class_id = parts[0][3:]     # '01'
        image_id = parts[1][3:]     # '000123'

        gt_path = os.path.join(LINEMOD_ROOT, class_id, 'gt.yml')
        if not os.path.exists(gt_path):
            print(f"[SKIP] Missing gt.yml for class {class_id}")
            continue

        gt_data = load_yaml(gt_path)

        img_index = int(image_id)
        if img_index not in gt_data:
            print(f"[SKIP] No pose for image {image_id} in {class_id}")
            continue

        # Use the FIRST object instance in that image
        obj = gt_data[img_index][0]
        R = obj['cam_R_m2c']  # list of 9 floats
        t = obj['cam_t_m2c']  # list of 3 floats (usually in mm)

        # Convert to meters (optional)
        t = [val / 1000.0 for val in t]

        # Write a row
        writer.writerow([os.path.join(CROP_FOLDER, filename), class_id, image_id] + t + R)

print("✅ Pose label file saved as:", OUTPUT_CSV)


✅ Pose label file saved as: pose_labels.csv


In [7]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import pandas as pd
from PIL import Image

class PoseDataset(Dataset):
    def __init__(self, csv_path, image_size=224):
        self.data = pd.read_csv(csv_path)

        self.transform = transforms.Compose([
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean
                                 std=[0.229, 0.224, 0.225])   # ImageNet std
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # Load and transform image
        image = Image.open(row['crop_path']).convert('RGB')
        image = self.transform(image)

        # Load pose (3 translation + 9 rotation values)
        pose = row[['x', 'y', 'z'] + [f'R{i}' for i in range(1, 10)]].values.astype('float32')
        pose = torch.tensor(pose)

        return image, pose

In [8]:
dataset = PoseDataset('pose_labels.csv')
image, pose = dataset[0]

print("Image shape:", image.shape)  # [3, 224, 224]
print("Pose vector:", pose)         # Tensor of shape [12]

Image shape: torch.Size([3, 224, 224])
Pose vector: tensor([ 0.1177, -0.0283,  0.9279, -0.9932, -0.1094,  0.0408, -0.1023,  0.6473, -0.7554,  0.0562, -0.7544, -0.6540])


In [9]:
import torch
import torch.nn as nn
import torchvision.models as models

class PoseRegressionNet(nn.Module):
    def __init__(self, pretrained=True):
        super(PoseRegressionNet, self).__init__()

        # 🔹 Load pretrained ResNet18 and remove its classifier
        resnet = models.resnet18(pretrained=pretrained)
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])  # Remove final FC layer

        # 🔹 Add a new regression head: input 512-d → output 12-d
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 12)  # Output: x, y, z, r11..r33
        )

    def forward(self, x):
        features = self.backbone(x)     # Shape: [B, 512, 1, 1]
        output = self.regressor(features)  # Shape: [B, 12]
        return output

In [10]:
model = PoseRegressionNet()
print(model)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /home/erythm/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:22<00:00, 2.08MB/s]


PoseRegressionNet(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_

In [12]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim


# Load dataset
dataset = PoseDataset('pose_labels.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Instantiate model
model = PoseRegressionNet()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Loss & Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)



In [13]:
EPOCHS = 20

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    for images, poses in dataloader:
        images = images.to(device)
        poses = poses.to(device)

        optimizer.zero_grad()

        outputs = model(images)           # [B, 12]
        loss = criterion(outputs, poses)  # MSE loss

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.6f}")

Epoch 1/20 - Loss: 0.168848
Epoch 2/20 - Loss: 0.044755
Epoch 3/20 - Loss: 0.022297
Epoch 4/20 - Loss: 0.014712
Epoch 5/20 - Loss: 0.013493
Epoch 6/20 - Loss: 0.010694
Epoch 7/20 - Loss: 0.008546
Epoch 8/20 - Loss: 0.008115
Epoch 9/20 - Loss: 0.008142
Epoch 10/20 - Loss: 0.007708
Epoch 11/20 - Loss: 0.006305
Epoch 12/20 - Loss: 0.007238
Epoch 13/20 - Loss: 0.007089
Epoch 14/20 - Loss: 0.006736
Epoch 15/20 - Loss: 0.005771
Epoch 16/20 - Loss: 0.005289
Epoch 17/20 - Loss: 0.005062
Epoch 18/20 - Loss: 0.005174
Epoch 19/20 - Loss: 0.005072
Epoch 20/20 - Loss: 0.004782


In [14]:
torch.save(model.state_dict(), 'pose_regression_model.pth')