In [3]:
import os
import re
import cv2
import pickle
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from helper import save_obj, load_obj

def get_rot_tra(rot_adr, tra_adr):
    """
    Helper function to the read the rotation and translation file
        Args:
                rot_adr (str): path to the file containing rotation of an object
        tra_adr (str): path to the file containing translation of an object
        Returns:
                rigid transformation (np array): rotation and translation matrix combined
    """

    rot_matrix = np.loadtxt(rot_adr, skiprows=1)
    trans_matrix = np.loadtxt(tra_adr, skiprows=1)
    trans_matrix = np.reshape(trans_matrix, (3, 1))
    rigid_transformation = np.append(rot_matrix, trans_matrix, axis=1)

    return rigid_transformation

tra_adr = "LineMOD_Dataset\\ape\\data\\tra0.tra"
rot_adr = "LineMOD_Dataset\\ape\\data\\rot0.rot"
rigid_transformation = get_rot_tra(rot_adr, tra_adr)

In [13]:
print(rigid_transformation.shape)

(3, 4)


In [15]:
fx = 572.41140
px = 325.26110
fy = 573.57043
py = 242.04899
intrinsic_matrix = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]])
classes = {'ape': 1, 'benchviseblue': 2, 'cam': 3, 'can': 4, 'cat': 5, 'driller': 6,
           'duck': 7, 'eggbox': 8, 'glue': 9, 'holepuncher': 10, 'iron': 11, 'lamp': 12, 'phone': 13}

list_all_images = load_obj("LineMOD_Dataset\\all_images_adr")
training_images_idx = load_obj("LineMOD_Dataset\\train_images_indices")
for i in range(1):
    img_adr = list_all_images[training_images_idx[i]]
    label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
    regex = re.compile(r'\d+')
    idx = regex.findall(os.path.split(img_adr)[1])[0]

    if i % 1000 == 0:
        print(str(i) + "/" + str(len(training_images_idx)) + " finished!")

    image = cv2.imread(img_adr)
    ID_mask = np.zeros((image.shape[0], image.shape[1]))
    U_mask = np.zeros((image.shape[0], image.shape[1]))
    V_mask = np.zeros((image.shape[0], image.shape[1]))

    ID_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/IDmasks/color" + str(idx) + ".png"
    U_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/Umasks/color" + str(idx) + ".png"
    V_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/Vmasks/color" + str(idx) + ".png"

    tra_adr = "LineMOD_Dataset/" + label + "/data/tra" + str(idx) + ".tra"
    rot_adr = "LineMOD_Dataset/" + label + "/data/rot" + str(idx) + ".rot"
    rigid_transformation = get_rot_tra(rot_adr, tra_adr)

    # Read point Point Cloud Data
    ptcld_file = "LineMOD_Dataset/" + label + "/object.xyz"
    pt_cld_data = np.loadtxt(ptcld_file, skiprows=1, usecols=(0, 1, 2))
    ones = np.ones((pt_cld_data.shape[0], 1))
    homogenous_coordinate = np.append(pt_cld_data[:, :3], ones, axis=1)

    # Perspective Projection to obtain 2D coordinates for masks
    homogenous_2D = intrinsic_matrix @ (
        rigid_transformation @ homogenous_coordinate.T)
    coord_2D = homogenous_2D[:2, :] / homogenous_2D[2, :]
    coord_2D = ((np.floor(coord_2D)).T).astype(int)
    x_2d = np.clip(coord_2D[:, 0], 0, 639)
    y_2d = np.clip(coord_2D[:, 1], 0, 479)
    ID_mask[y_2d, x_2d] = classes[label]
    print(rigid_transformation @ homogenous_coordinate.T)

0/2370 finished!
[[ -9.07477831  -8.97066587  -9.0123123  ... -10.08988799 -10.13153361
   -9.97536373]
 [  6.79336213   6.87701669   6.70796074 ...   0.45024232   0.28118702
    0.40666984]
 [109.26116115 109.04317172 109.408136   ... 102.44449177 102.80945435
  102.48246765]]


In [17]:
fx = 572.41140
px = 325.26110
fy = 573.57043
py = 242.04899
intrinsic_matrix = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]])
classes = {'ape': 1, 'benchviseblue': 2, 'cam': 3, 'can': 4, 'cat': 5, 'driller': 6,
           'duck': 7, 'eggbox': 8, 'glue': 9, 'holepuncher': 10, 'iron': 11, 'lamp': 12, 'phone': 13}

list_all_images = load_obj("LineMOD_Dataset\\all_images_adr")
training_images_idx = load_obj("LineMOD_Dataset\\train_images_indices")
for i in range(1):
    img_adr = list_all_images[training_images_idx[i]]
    label = os.path.split(os.path.split(os.path.dirname(img_adr))[0])[1]
    regex = re.compile(r'\d+')
    idx = regex.findall(os.path.split(img_adr)[1])[0]

    if i % 1000 == 0:
        print(str(i) + "/" + str(len(training_images_idx)) + " finished!")

    image = cv2.imread(img_adr)
    ID_mask = np.zeros((image.shape[0], image.shape[1]))
    U_mask = np.zeros((image.shape[0], image.shape[1]))
    V_mask = np.zeros((image.shape[0], image.shape[1]))

    ID_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/IDmasks/color" + str(idx) + ".png"
    U_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/Umasks/color" + str(idx) + ".png"
    V_mask_file = "LineMOD_Dataset/" + label + \
        "/ground_truth/Vmasks/color" + str(idx) + ".png"

    tra_adr = "LineMOD_Dataset/" + label + "/data/tra" + str(idx) + ".tra"
    rot_adr = "LineMOD_Dataset/" + label + "/data/rot" + str(idx) + ".rot"
    rigid_transformation = get_rot_tra(rot_adr, tra_adr)

    # Read point Point Cloud Data
    ptcld_file = "LineMOD_Dataset/" + label + "/object.xyz"
    pt_cld_data = np.loadtxt(ptcld_file, skiprows=1, usecols=(0, 1, 2))
    ones = np.ones((pt_cld_data.shape[0], 1))
    homogenous_coordinate = np.append(pt_cld_data[:, :3], ones, axis=1) # so that it fits the matrix multiplication

    # Perspective Projection to obtain 2D coordinates for masks
    homogenous_2D = intrinsic_matrix @ (
        rigid_transformation @ homogenous_coordinate.T)
    coord_2D = homogenous_2D[:2, :] / homogenous_2D[2, :]
    coord_2D = ((np.floor(coord_2D)).T).astype(int)
    x_2d = np.clip(coord_2D[:, 0], 0, 639)
    y_2d = np.clip(coord_2D[:, 1], 0, 479)
    ID_mask[y_2d, x_2d] = classes[label]
    print(homogenous_2D)

0/2370 finished!
[[30343.89890714 30332.59057233 30427.46036391 ... 27545.64117091
  27640.51087403 27623.54823911]
 [30343.02533957 30338.24299976 30329.61674122 ... 25054.83144471
  25046.20514876 25039.03158292]
 [  109.26116115   109.04317172   109.408136   ...   102.44449177
    102.80945435   102.48246765]]


In [19]:
print(intrinsic_matrix @ intrinsic_matrix)

[[3.27654811e+05 0.00000000e+00 1.86508423e+05]
 [0.00000000e+00 3.28983038e+05 1.39074192e+05]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]


In [20]:
intrinsic_matrix.shape

(3, 3)

In [21]:
intrinsic_matrix

array([[572.4114 ,   0.     , 325.2611 ],
       [  0.     , 573.57043, 242.04899],
       [  0.     ,   0.     ,   1.     ]])

In [24]:
homogenous_coordinate

array([[-2.36478 , -0.192424, -0.128283,  1.      ],
       [-2.10913 , -0.192424, -0.128283,  1.      ],
       [-2.49261 , -0.320707, -0.128283,  1.      ],
       ...,
       [ 0.958695,  0.577273, -8.85151 ,  1.      ],
       [ 0.575217,  0.44899 , -8.85151 ,  1.      ],
       [ 0.958695,  0.44899 , -8.85151 ,  1.      ]])

In [25]:
np.ones((pt_cld_data.shape[0], 1))

array([[1.],
       [1.],
       [1.],
       ...,
       [1.],
       [1.],
       [1.]])

In [26]:
rigid_transformation

array([[  0.407246,  -0.892747,   0.192754,  -8.25879 ],
       [  0.327223,   0.339659,   0.881792,   7.74565 ],
       [ -0.852687,  -0.296032,   0.430452, 107.243   ]])

In [None]:
rot_matrix = np.loadtxt("LineMOD_Dataset/ape/data/rot0.rot", skiprows=1)
trans_matrix = np.loadtxt("LineMOD_Dataset/ape/data/tra0.tra", skiprows=1)
trans_matrix = np.reshape(trans_matrix, (3, 1))
rigid_transformation = np.append(rot_matrix, trans_matrix, axis=1)
