# 6D Pose Estimation

## Setup

LineMOD dataset: https://drive.google.com/drive/folders/19ivHpaKm9dOrr12fzC8IDFczWRPFxho7

In [None]:
COMET_ML = False # se True il notebook si connette a comet.ml per registrare i risultati degli esperimenti; serve un chiave API.

In [None]:
!ls

In [None]:
# da usare appena apro il notebook colab
!git clone https://github.com/luigiaceto/6D-pose-estimation.git

In [None]:
%cd 6D-pose-estimation

In [None]:
# da eseguire SOLO SE mentre sto su colab ho pushato modifiche nella repo
!git pull

Install all PyTorch dependencies:

In [None]:
# l'installazione di PyTorch cambia a seconda dell'hardware su cui viene eseguita.
# Mettendolo nei requirements si installa solo la versione CPU/default.
!pip install torch torchvision torchaudio

Install all packages, you may need to restart the runtime before continuing:

In [None]:
!pip install -r ./requirements.txt
print("Restart runtime")

In [None]:
!ls

In [None]:
import torch
import time
from torch.utils.data import DataLoader
import wandb

import comet_ml
from comet_ml import Experiment
from comet_ml.integration.pytorch import watch

from utils.data_exploration import load_image

from data.CustomDatasetPose import IMG_WIDTH, IMG_HEIGHT

Set seed and device:

In [None]:
from utils.init import set_seed
from utils.init import set_device

set_seed(42)
device = set_device()

## Download dataset

In [None]:
!mkdir -p datasets/linemod
%cd datasets/linemod

In [None]:
!gdown --folder "https://drive.google.com/drive/folders/1xEHOOLrkLD814mA9cJqM2kZ-vn35Zr7s?usp=drive_link"

In [None]:
%cd DenseFusion

In [None]:
!unzip Linemod_preprocessed.zip
!rm Linemod_preprocessed.zip
!rm -f trained_checkpoints.zip

In [None]:
# torno alla cartella root del progetto
%cd ../../../

Get working directory:

In [None]:
# print working directory
path = !pwd
path = path[0]

## Dataset Preprocessing

Copy ground truth files to ```Linemod_preprocessed```:

In [None]:
from utils.data_exploration import get_class_names
from utils.preprocessing import copy_gt_file, change_02gt, quaternion_gt

folder_names = get_class_names()
copy_gt_file(folder_names)

Change ```02_gt.yml``` to take only one object:

In [None]:
change_02gt("./datasets/linemod/DenseFusion/Linemod_preprocessed/02_gt.yml")

Add quaternion

In [None]:
quaternion_gt("./datasets/linemod/DenseFusion/Linemod_preprocessed")

## Data Exploration

Load an image to check if it's all ok:

In [None]:
load_image(label=1, object=0)

## Define CustomDataset

In [None]:
from data.CustomDatasetPose import CustomDatasetPose
from utils.data_exploration import get_camera_intrinsics

dataset_root = "./datasets/linemod/DenseFusion/Linemod_preprocessed/"

cam_K = get_camera_intrinsics(dataset_root)

train_dataset = CustomDatasetPose(dataset_root, split="train", device=device, cam_K = cam_K)
image_mean, image_std = train_dataset.get_image_mean_std()
print(f"Training samples: {len(train_dataset)}")

val_dataset = CustomDatasetPose(dataset_root, split="validation", device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f"Validation samples: {len(val_dataset)}")

test_dataset = CustomDatasetPose(dataset_root, split="test", device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f"Testing samples: {len(test_dataset)}")

## Data Preprocessing for YOLO

In [None]:
train_samples = train_dataset.get_samples_id()
validation_samples = val_dataset.get_samples_id()
test_samples = test_dataset.get_samples_id() # test folder is optional for training YOLO

Create a new folder containing all the info for YOLO, we just need the rgb image and a text file with the label and bounding box.
The ```Linemod_preprocessed``` is not removed, as it contains info about translation and rotation that are needed for pose estimation, but not for YOLO object detection model.

Create YOLO yaml

In [None]:
from utils.preprocessing import create_YOLO_yaml, create_dataset_YOLO

number_classes, class_names = create_YOLO_yaml(path, folder_names)

While creating the folder structure, we have to change the class id by using the index in the array written in the ```data.yaml```

In [None]:
# create a dictionary to have easily access to the index. Dato che usiamo un sottoinsieme
# di folder di LineMOD
index_dict = dict()
for index, elem in enumerate(class_names):
    index_dict[int(elem)] = index

Create the folders

In [None]:
counter_df = create_dataset_YOLO(number_classes, train_samples, validation_samples, test_samples, index_dict, path, train_dataset)

Visualize dataset distribution

In [None]:
from utils.data_exploration import load_dataset_distribution

load_dataset_distribution(counter_df, index_dict, number_classes)

## Visualize data

Visualize depth image

In [None]:
from utils.data_exploration import load_depth_image

folder = "01"
object_name = "0000"
img = load_depth_image(f"./datasets/linemod/DenseFusion/Linemod_preprocessed/data/{folder}/depth/{object_name}.png")

Plot the patch of first object of the image, it reads from the ground truth file containing also multiple objects in one image

In [None]:
from utils.data_exploration import load_depth_patch

load_depth_patch(path, folder, object_name, img)

Get data loader

In [None]:
from data.DataLoaderCollating import rgb_collate_fn

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=rgb_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=rgb_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=rgb_collate_fn)
print(f"Training loader: {len(train_loader)}")
print(f"Validation loader: {len(val_loader)}")
print(f"Test loader: {len(test_loader)}")

Plot one batch of data

In [None]:
from utils.data_exploration import plot_batch_data

plot_batch_data(train_loader, val_loader, test_loader)

## Training YOLO (Object Detection model)

In [None]:
from train_YOLO import train_YOLO

epochs = 20
batch_size = 64
IMG_SIZE = 640

train_YOLO(path, epochs, batch_size, device, IMG_SIZE) # train model and save it to checkpoints

Validate model on test set

In [None]:
from evaluate_YOLO import evaluate_YOLO

evaluate_YOLO(path, epochs, batch_size, IMG_SIZE, device)

## Pose Estimator Module

In [None]:
from models.PosePredictorModel import PosePredictorModel
from models.PosePredictorModelAlternative import PosePredictorModelAlternative
from PoseEstimationTrainer import PoseEstimationTrainer
from models.ADDMetric import ADDMetric
from utils.pose_plot import plotPose

In [None]:
from data.DataLoaderCollating import pointcloud_collate_fn_baseline

config = {
    "project_name": "baseline_quaternion",
    "experiment_name": "mse_loss_2_cosine_mlp_complete",
    "batch_size": 32,
    "num_epochs": 52,
    "learning_rate": 1e-4,
    "weight_decay": 1e-5,
    "backbone": "resnet18",
    "hidden_dim": 512,
    "img_size": 224,
    "alpha": 1.0,
    "beta": 1.0,
    "add_threshold": 0.1,
    "symmetric_objects": ["10","11"],
    "name_saved_file": "mse_loss_2_cosine_mlp_complete"
}

MODELS_DIR = "./datasets/linemod/DenseFusion/Linemod_preprocessed/models"
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Using device: {DEVICE}")
print(f"Configuration: {config}")

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True, collate_fn=pointcloud_collate_fn_baseline)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False, collate_fn=pointcloud_collate_fn_baseline)
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False, collate_fn=pointcloud_collate_fn_baseline)

# Model
model = PosePredictorModel(
    backbone=config["backbone"],
    hidden_dim=config["hidden_dim"]
).to(device)

# model = PosePredictorModelAlternative(backbone=config["backbone"],hidden_dim=config["hidden_dim"]).to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

if COMET_ML:
    experiment = comet_ml.start(
        api_key="<YOUR_API>",
        project_name=config['project_name'],
        experiment_config=comet_ml.ExperimentConfig(
            name=config["experiment_name"],
            parse_args=False)
    )

    experiment.log_parameters(config)
else:
    experiment = None

Train

In [None]:
RESUME = True # set to False if needed

checkpoint = None
if RESUME:
    checkpoint = torch.load(f"./checkpoints/baseline_best_result.pth", map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

trainer = PoseEstimationTrainer(model, train_loader, val_loader, device=device, config=config, experiment=experiment, resume_optimizer=True, checkpoint=checkpoint)
trainer.train(num_epochs=config["num_epochs"])

Evaluate model on test set

In [None]:
checkpoint = torch.load(f"{path}/checkpoints/baseline_best_result.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

add_metric = ADDMetric(
    model=model,
    class_names=class_names,
    test_loader=test_loader,
    models_3D_dir=MODELS_DIR,
    symmetric_objects=config["symmetric_objects"],
    device=DEVICE,
    experiment=experiment,
    config=config
)

print("Evaluating with ADD metric...")
add_score, accuracy, detailed_results = add_metric.evaluate_model_with_add()


print(f"\nFinal Results:\nADD Score: {add_score:.4f}\nAccuracy: {accuracy:.4f}")

Visualize inference of test set

In [None]:
from utils.pose_plot import plotPose

for idx, batch in enumerate(test_loader):
    images = batch['rgb'].to(device)
    gt_trans = batch['translation']
    gt_rot = batch['rotation']
    object_ids = batch['obj_id']
    sample_id = batch["sample_id"]

    with torch.no_grad():
        pred_trans, pred_rot = model(images)
        pred_trans = pred_trans
        pred_rot = pred_rot
       
        for i in range(len(images)):
            if i == 0:
                img_path = f"{path}/datasets/linemod/DenseFusion/Linemod_preprocessed/data/{sample_id[i][0]:02d}/rgb/{sample_id[i][1]:04d}.png"

                plotPose(img_path, gt_trans[i], gt_rot[i], pred_trans[i], pred_rot[i], experiment, cam_K)
print(f"Plot saved on comet_ml in project: {config['project_name']}, experiment: {config['experiment_name']}")

if COMET_ML:
    experiment.end()

## Inference Baseline

Inference on test set using ```YOLO```, one image at a time. The ```ADD``` metric is also computed, so ground truth pose is required. It creates training, validation and test sets.

Only the best baseline model is used for inference

In [None]:
from inferenceBaseline import inference_baseline

inference_baseline(class_names=class_names, cam_K=cam_K, device=device, path=path)

## Extension

Compare images in ```rgb``` and ```mask``` and analyze if there are images that are only in one of the folders

In [None]:
from utils.data_exploration import compare_rgb_mask_in_data

compare_rgb_mask_in_data("./datasets/linemod/DenseFusion/Linemod_preprocessed/data/")

Create dataset

In [None]:
from data.CustomDataset import CustomDataset

dataset_root = "./datasets/linemod/DenseFusion/Linemod_preprocessed/"

train_dataset = CustomDataset(dataset_root, split='train', device=device, cam_K=cam_K)
image_mean, image_std = train_dataset.get_image_mean_std()
print(f'Training samples: {len(train_dataset)}')

val_dataset = CustomDataset(dataset_root, split='validation', device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f'Validation samples: {len(val_dataset)}')

test_dataset = CustomDataset(dataset_root, split='test', device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f'Testing samples: {len(test_dataset)}')

Get dataloader

In [None]:
from data.DataLoaderCollating import pointcloud_collate_fn

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=pointcloud_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=pointcloud_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=pointcloud_collate_fn)

In [None]:
from models.PoseEstimationPipeline import PoseEstimationPipeline
from PoseTrainer import PoseTrainer
from models.PoseLossExtension import PoseLossExtension
from models.ADDMetricExtension import ADDMetricExtension

In [None]:
config = {
    "project_name": "pointnet",
    "experiment_name": "HiG",
    "batch_size": 16,
    "num_epochs": 90,
    "learning_rate": 1.0e-04,
    "weight_decay": 1e-5,
    "backbone": "resnet18",
    "hidden_dim": 512,
    "img_size": 224,
    "alpha": 1.0,
    "beta": 1.0,
    "add_threshold": 0.1,
    "symmetric_objects": ["10","11"],
    "name_saved_file": "HiG",
    "geometric_dims" : [64,128,256],
    "fusion_dim" : 128,
    "num_run_plotPose": 1
}

MODELS_DIR = "./datasets/linemod/DenseFusion/Linemod_preprocessed/models"

print(f"Using device: {device}")
print(f"Configuration: {config}")

# model
model = PoseEstimationPipeline(fx=cam_K[0],fy=cam_K[4],cx=cam_K[2],cy=cam_K[5]).to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

if COMET_ML:
    experiment = comet_ml.start(
        api_key="<YOUR_API>",
        project_name=config['project_name'],
        experiment_config=comet_ml.ExperimentConfig(
            name=config["experiment_name"],
            parse_args=False)
    )

    experiment.log_parameters(config)
else:
    experiment = None

Train

In [None]:
RESUME = True # set to False if needed

checkpoint = None
if RESUME:
    checkpoint = torch.load(f"./checkpoints/HiG_Resnet18_bs16.pth", map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

trainer = PoseTrainer(model, class_names, train_loader, val_loader, device=device, config=config, experiment= experiment, resume_optimizer=True, checkpoint=checkpoint)
trainer.train(num_epochs=config["num_epochs"])

Evaluate model on test set

In [None]:
checkpoint = torch.load(f"./checkpoints/HiG_Resnet18_bs16.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

add_metric = ADDMetricExtension(
    model=model,
    class_names=class_names,
    test_loader=test_loader,
    models_3D_dir=MODELS_DIR,
    symmetric_objects=config["symmetric_objects"],
    device=device,
    experiment= experiment,
    config=config
)

print("Evaluating with ADD metric...")
add_score, accuracy, detailed_results = add_metric.evaluate_model_with_add()

print(f"\nFinal Results:\nADD Score: {add_score:.4f}\nAccuracy: {accuracy:.4f}")

Visualize inference on test set

In [None]:
from utils.pose_plot import plotPose

criterion = PoseLossExtension(class_names=class_names,device=device)

for idx, batch in enumerate(test_loader):
    gt_trans = batch['translation']
    gt_rot = batch['rotation']
    object_ids = batch['obj_id']
    sample_id = batch["sample_id"]

    with torch.no_grad():
        pixel_rotations_norm, pixel_translations, pixel_confidences = model(batch)
        loss, r, t = criterion(pixel_rotations_norm, pixel_translations, pixel_confidences, gt_trans, gt_rot, object_ids)

        for i in range(len(object_ids)):
            if i == 0:
                img_path = f"./datasets/linemod/DenseFusion/Linemod_preprocessed/data/{sample_id[i][0]:02d}/rgb/{sample_id[i][1]:04d}.png"
                plotPose(img_path, gt_trans[i], gt_rot[i], t[i], r[i], experiment=experiment, camera_intrinsics=cam_K)
print(f"Plot saved on comet_ml in project: {config['project_name']}, experiment: {config['experiment_name']}")

if COMET_ML:
    experiment.end()

## Inference Extension

Perform inference on test set using ```YOLO```, but an object detection model is used instead of ground truth info. The ```ADD``` metric is also computed,
so ground truth pose is required. It creates training, validation and test sets.

In [None]:
from inferenceExtension import inference_extension

inference_extension(class_names=class_names, cam_K=cam_K, device=device, path=path)