In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/LineMOD_DATASET/Linemod_preprocessed.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Linemod_preprocessed/segnet_results/11_label/0206_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0207_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0208_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0209_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0210_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0211_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0212_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0213_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0214_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0215_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0216_label.png  
  inflating: Linemod_preprocessed/segnet_results/11_label/0218_label.png  
  inflating: Linemod_preprocessed/s

In [None]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F  # Import for F.mse_loss
from torchvision import models
import cv2
import yaml
from tqdm import tqdm

In [None]:
import os
import yaml
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from scipy.spatial.transform import Rotation as R

class LinemodPoseDataset(Dataset):
    def __init__(self, cropped_dir, linemod_root, transform=None):
        self.cropped_dir = cropped_dir
        self.linemod_root = linemod_root
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),

        ])
        self.img_filenames = sorted([
            f for f in os.listdir(cropped_dir) if f.endswith(".png")
        ])

        # Preload gt.yml data for all classes
        self.gt_data = {}
        for class_id in range(1, 16):
            class_str = f"{class_id:02d}"
            gt_path = os.path.join(linemod_root, class_str, "gt.yml")
            if os.path.exists(gt_path):
                with open(gt_path, 'r') as f:
                    self.gt_data[class_str] = yaml.safe_load(f)

    def __len__(self):
        return len(self.img_filenames)

    def __getitem__(self, idx):
        filename = self.img_filenames[idx]
        class_id, img_id_str = filename.split("_")
        img_id = int(os.path.splitext(img_id_str)[0])

        img_path = os.path.join(self.cropped_dir, filename)
        img = Image.open(img_path).convert('RGB')
        img_tensor = self.transform(img)

        # Find pose entry for this class in the list
        pose_list = self.gt_data[class_id][img_id]
        pose = next(item for item in pose_list if item['obj_id'] == int(class_id))

        R_mat = np.array(pose['cam_R_m2c']).reshape(3, 3).astype(np.float32)
        quat = R.from_matrix(R_mat).as_quat().astype(np.float32)  # [x, y, z, w]
        quat /= np.linalg.norm(quat)

        t_vec = np.array(pose['cam_t_m2c'], dtype=np.float32) /1000.0  # Already in mm

        # print(f"t_vec={t_vec} and R_mat={R_mat} and quat={quat} ")


        return {
            'image': img_tensor,
            'rotation': torch.tensor(quat),
            'rotation_matrix': torch.tensor(R_mat),
            'translation': torch.tensor(t_vec),  # stays in mm
            'class_id': int(class_id),
            'filename': filename
        }





In [None]:
# train_dataset = LinemodPoseDataset("/content/train", "/content/Linemod_preprocessed/data")

In [None]:
# train_dataset.gt_data

In [None]:
import torch.nn as nn
import torchvision.models as models
class PoseNet6D(nn.Module):
    def __init__(self, pretrained=True):
        super(PoseNet6D, self).__init__()
        backbone = models.resnet50(
            weights=models.ResNet50_Weights.IMAGENET1K_V2 if pretrained else None
        )
        self.backbone = nn.Sequential(*list(backbone.children())[:-1])  # (B, 2048, 1, 1) # Remove final fc

        self.fc_rot = nn.Linear(2048, 4)   # Quaternion output (x, y, z, w)
        self.fc_trans = nn.Linear(2048, 3) # Translation output (x, y, z)

    def forward(self, x):
        features = self.backbone(x).squeeze()  # (B, 2048)
        rot = self.fc_rot(features)            # (B, 4)
        trans = self.fc_trans(features)        # (B, 3)
        rot = F.normalize(rot, dim=1)          # Unit quaternion
        return rot, trans


In [None]:
def pose_loss(pred_rot, pred_trans, gt_rot, gt_trans, alpha=10.0):
    loss_rot = F.mse_loss(pred_rot, gt_rot)
    loss_trans = F.mse_loss(pred_trans, gt_trans)
    return alpha * loss_rot + loss_trans


In [None]:
def train(model, train_loader, optimizer, device, epochs=20):
    model.train()  # Set model to training mode
    for epoch in range(epochs):
        running_loss = 0.0
        for batch_idx, data in enumerate(train_loader):
            inputs = data['image'].to(device)
            rotations = data['rotation'].to(device)
            translations = data['translation'].to(device)

            optimizer.zero_grad()

            # Forward pass
            predicted_rot, predicted_trans = model(inputs)

            # print(f"predicted_rot={predicted_rot} and rotations={rotations} ")
            # print(f"predicted_trans={predicted_trans} and translations={translations} ")
            # Compute losses
            rot_loss = F.mse_loss(predicted_rot, rotations)
            trans_loss = F.mse_loss(predicted_trans, translations)
            # print(f"rotation loss={rot_loss}")
            # print(f"trans loss={trans_loss}")

            # Total loss
            loss = rot_loss + trans_loss
            # print(f"total loss={loss}")

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            # print(f"running_loss={running_loss}")

        avg_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")



    print("Training complete.")


In [None]:
def validate(model, val_loader, device):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():  # Disable gradients for validation
        running_loss = 0.0
        for data in val_loader:
            inputs = data['image'].to(device)
            rotations = data['rotation'].to(device)
            translations = data['translation'].to(device)

            # Forward pass
            predicted_rot, predicted_trans = model(inputs)


            # Compute losses
            rot_loss = F.mse_loss(predicted_rot, rotations)
            # print(f"rotation loss={rot_loss}")
            trans_loss = F.mse_loss(predicted_trans, translations)
            # print(f"trans loss={trans_loss}")
            # Total loss
            loss = rot_loss + trans_loss

            running_loss += loss.item()

        avg_loss = running_loss / len(val_loader)
        print(f"Validation Loss: {avg_loss:.4f}")
        return avg_loss


In [None]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F  # Import for F.mse_loss
from torchvision import models

cropped_dir = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/train/train_cropped_objects"  # folder with all cropped images
linemod_root="/content/Linemod_preprocessed/data"
train_dir = "/content/train"
val_dir = "/content/val"
split_ratio = 0.8  # 80% train, 20% val

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

image_files = [f for f in os.listdir(cropped_dir) if f.endswith(".png")]
train_files, val_files = train_test_split(image_files, train_size=split_ratio, random_state=42)

# Copy training images
for file in train_files:
    shutil.copy(os.path.join(cropped_dir, file), os.path.join(train_dir, file))

# Copy validation images
for file in val_files:
    shutil.copy(os.path.join(cropped_dir, file), os.path.join(val_dir, file))

print(f"Train: {len(train_files)} images, Val: {len(val_files)} images")
train_dataset = LinemodPoseDataset(train_dir, linemod_root)
val_dataset = LinemodPoseDataset(val_dir, linemod_root)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = PoseNet6D().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

best_val_loss = float('inf')  # Initialize with infinity
save_path = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/poseModel/posenet6d_best_model.pth"
checkpoint_path = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/poseModel/posenet6d_checkpoint.pth"
# Load checkpoint if exists
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    best_val_loss = checkpoint['best_val_loss']
    start_epoch = checkpoint['epoch'] + 1  # Start from the next epoch
    print(f"Resumed training from epoch {start_epoch}")
else:
    start_epoch = 0  # If no checkpoint, start from the beginning
    print("No checkpoint found, starting from epoch 0")


os.makedirs(os.path.dirname(save_path), exist_ok=True)
epoch_num=180
for epoch in range(epoch_num):
    print(f"\nEpoch {epoch + 1}/{epoch_num}")
    train(model, train_loader, optimizer, device, epochs=1)  # One epoch at a time

    val_loss = validate(model, val_loader, device)  # Assume validate returns loss

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), save_path)
        print(f"Best model saved at epoch {epoch+1} with val_loss = {val_loss:.4f}")
    # Save checkpoint after each epoch
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'best_val_loss': best_val_loss
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch+1}")




Train: 1898 images, Val: 475 images


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 201MB/s]


No checkpoint found, starting from epoch 0

Epoch 1/180
rotation loss=0.378292441368103
trans loss=0.2474527806043625
total loss=0.6257452368736267
running_loss=0.6257452368736267
rotation loss=0.3147270083427429
trans loss=0.2344573587179184
total loss=0.5491843819618225
running_loss=1.1749296188354492
rotation loss=0.30812007188796997
trans loss=0.2146761417388916
total loss=0.5227962136268616
running_loss=1.6977258324623108
rotation loss=0.257983922958374
trans loss=0.1923447549343109
total loss=0.45032867789268494
running_loss=2.1480545103549957
rotation loss=0.2287067174911499
trans loss=0.18866638839244843
total loss=0.4173731207847595
running_loss=2.5654276311397552
rotation loss=0.22424018383026123
trans loss=0.15368133783340454
total loss=0.37792152166366577
running_loss=2.943349152803421
rotation loss=0.25519269704818726
trans loss=0.17264798283576965
total loss=0.4278406798839569
running_loss=3.371189832687378
rotation loss=0.2025376260280609
trans loss=0.12769097089767456
t

KeyboardInterrupt: 

In [None]:
# import torch
# torch.cuda.empty_cache()


In [None]:
# # Create a model instance
# model = PoseNet6D().to(device)
# # model path
# save_path = "/content/drive/MyDrive/yolo_models/linemod_yolo_v8n/poseModel/posenet6d_best_model.pth"
# # Load the saved weights
# model.load_state_dict(torch.load(save_path))


