# Notebook to read CONG data and save grasps, voxel grids and respective constrained masks

Model input - voxel grid (sdf + mask), grasp, scaling parameter

For visualization - mesh, query points

In [2]:
import os
import json
import h5py
import trimesh
import trimesh.path
import trimesh.transformations as tra
import numpy as np
# from acronym_tools import load_mesh, load_grasps, create_gripper_marker

import os, json
import trimesh
import mesh2sdf
import numpy as np
from scipy.spatial.transform import Rotation as R
from tqdm import tqdm
import pickle

def makedirs(dirname):
    if not os.path.exists(dirname):
        os.makedirs(dirname)

def create_voxel_grid(mesh, n=32):
    """
    Uses the the mesh2sdf library to convert the mesh into a (n x n x n) voxel grid
    The values within the grid are sdf (signed distance function) values
    Input - 
        1. mesh_path --> Path to mesh file (.obj file)
        2. n --> size of voxel grid
    Output - 
        1. mesh --> mesh object after loading, normalizing and fixing mesh
        2. sdf --> (n x n x n) numpy array 
    """

    # try:
    mesh_scale = 0.8
    size = n
    level = 2 / size

    # normalize mesh
    vertices = mesh.vertices
    bbmin = vertices.min(0)
    bbmax = vertices.max(0)
    center = (bbmin + bbmax) * 0.5
    scale = 2.0 * mesh_scale / (bbmax - bbmin).max()
    vertices = (vertices - center) * scale
    # mesh_scaled = mesh.apply_scale(scale)

    sdf = mesh2sdf.compute(
        vertices, mesh.faces, size, fix=True, level=level, return_mesh=False)

    mesh.vertices = vertices
    return sdf, scale, center, mesh

def point_to_voxel(point, grid_size):
    """Converts a point in the range [-1, 1] to voxel grid coordinates."""
    return np.clip(((point + 1) / 2) * (grid_size - 1), 0, grid_size - 1).astype(int)

def update_mask(mask, points):
    """Updates the mask for each point in the list of points."""
    grid_size = mask.shape[0]  # Assuming the mask is a cubic grid
    for point in points:
        voxel = point_to_voxel(point, grid_size)
        mask[voxel[0], voxel[1], voxel[2]] = 1

    return mask

def center_grasps(grasps, center):
    translation_T = np.zeros_like(np.eye(4))
    translation_T[0][3] = -center[0]
    translation_T[1][3] = -center[1]
    translation_T[2][3] = -center[2]
    g = grasps + translation_T
    return g

def get_n_query_points_and_grasps(data, T, center_scale, norm_scale, grasp_success_idxs, n=4):
    count_succ = 0

    num_pc = len(data['rendering/point_clouds'])

    rand_pc_ix = np.random.choice(num_pc, size=min(n*4, num_pc), replace=False)

    output = []
    for pc_ix in rand_pc_ix:
        obj = {}

        pc = data['rendering/point_clouds'][pc_ix]

        grasp_ix = data['query_points/grasp_indices_for_each_point_with_grasp_on_each_rendered_point_cloud'][pc_ix]
        qp_ixes = data['query_points/points_with_grasps_on_each_rendered_point_cloud'][pc_ix]

        cam_pose = data['rendering/camera_poses'][pc_ix]
        if(len(qp_ixes) > 0):
            # Randomly picking one set of query points
            k = np.random.randint(0, len(qp_ixes)-1)
            qp_ixes_rand = qp_ixes[k]
            grasp_ix_rand = grasp_ix[k]
            intersection_ix = np.intersect1d(grasp_ix_rand, grasp_success_idxs)

            cam_pose_inv = np.linalg.inv(cam_pose)

            query_point_arr = pc[qp_ixes_rand]
            query_point_arr_added_dim = np.concatenate([query_point_arr, np.ones_like(query_point_arr[:, :1])], axis=1)
            query_point_arr_t = (cam_pose_inv @ query_point_arr_added_dim.T).T
            new_qp = query_point_arr_t[:,:3]
            
            # Now, we have correct query points (after applying camera pose inverse)

            if(new_qp.shape[0] > 10):
                count_succ += 1
                if(count_succ > n):
                    return output
                new_qp_norm = (new_qp - center_scale) * norm_scale
                mask = np.zeros((32, 32, 32))
                mask = update_mask(mask, new_qp_norm)

                obj['mask'] = mask
                obj['query_points_normalized'] = new_qp_norm
                # We center the grasps but we don't scale them, instead we will save this scale value and provide it as a input to the model
                obj['constrained_grasps'] = center_grasps(T[intersection_ix], center_scale)
                output.append(obj)

In [27]:
cong_dir = "/home/username/data/cong"
cong_files = os.listdir(cong_dir)

cong_files = [os.path.join(cong_dir, i) for i in cong_files]
print(len(cong_files))

7896


In [28]:
masks_output_dirname = "/home/username/data/constrained/constrain_masks"
grasps_output_dirname = "/home/username/data/constrained/grasps"
voxels_output_dirname = "/home/username/data/constrained/voxel_grids"

Single processing setup

In [29]:
def cong_data_prepare(cong_path):

    cong_fname = cong_path

    mesh_root = "/home/username/data/meshes"
    obj_type = os.path.basename(cong_fname).split("_")[1]

    # Read cong file
    with open(cong_fname, 'rb') as f:
        cong_data = pickle.load(f)

    # Get grasps and success from cong data (same as acronym)
    T = cong_data['grasps/transformations']
    success = cong_data['grasps/successes']

    mesh_scale = cong_data['mesh/scale'] # Scale from cong data (same as acronym)

    # Loading and applying initial scale to mesh
    mesh_fname = os.path.join(os.path.join(mesh_root, obj_type), os.path.basename(cong_data['mesh/file']))
    mesh = trimesh.load(mesh_fname, force='mesh')
    mesh = mesh.apply_scale(mesh_scale)

    # Getting indices for all successful grasps
    good_idxs = np.argwhere(success==1)[:,0]

    # Normalizing mesh between -1 and 1, creating voxel grid
    sdf, norm_scale, center_scale, mesh = create_voxel_grid(mesh, n=32)

    num_pc = len(cong_data['rendering/point_clouds'])
    # num_pc
    rand_pc_ix = np.random.choice(num_pc, size=num_pc, replace=False)
    rand_pc_ix

    output = []
    for pc_ix in rand_pc_ix:
        obj = {}

        pc = cong_data['rendering/point_clouds'][pc_ix]
        grasp_ix = cong_data['query_points/grasp_indices_for_each_point_with_grasp_on_each_rendered_point_cloud'][pc_ix]
        qp_ixes = cong_data['query_points/points_with_grasps_on_each_rendered_point_cloud'][pc_ix]
        cam_pose = cong_data['rendering/camera_poses'][pc_ix]

        if(len(qp_ixes) > 1):
            # Randomly picking one set of query points
            k = np.random.randint(0, len(qp_ixes)-1)
            qp_ixes_rand = qp_ixes[k]
            grasp_ix_rand = grasp_ix[k]
            intersection_ix = np.intersect1d(grasp_ix_rand, good_idxs)

            cam_pose_inv = np.linalg.inv(cam_pose)

            query_point_arr = pc[qp_ixes_rand]
            query_point_arr_added_dim = np.concatenate([query_point_arr, np.ones_like(query_point_arr[:, :1])], axis=1)
            query_point_arr_t = (cam_pose_inv @ query_point_arr_added_dim.T).T
            new_qp = query_point_arr_t[:,:3]
            
            # Now, we have correct query points (after applying camera pose inverse)

            if(new_qp.shape[0] > 10):
                if(len(output) > 1):
                    break;
                new_qp_norm = (new_qp - center_scale) * norm_scale
                mask = np.zeros((32, 32, 32))
                mask = update_mask(mask, new_qp_norm)

                obj['mask'] = mask
                # obj['query_points_normalized'] = new_qp_norm
                obj['cam_pose'] = cam_pose
                obj['cam_pose_inv'] = cam_pose_inv
                # We center the grasps but we don't scale them, instead we will save this scale value and provide it as a input to the model
                obj['constrained_grasps'] = center_grasps(T[intersection_ix], center_scale)
                obj['new_qp'] = new_qp
                output.append(obj)
        # break;
    try:
        f = output[0]

        temp = os.path.basename(cong_fname)[12:]
        mask_output_fname = os.path.join(masks_output_dirname, os.path.splitext(temp)[0]+".npz")
        np.savez_compressed(mask_output_fname, f['mask'])

        voxel_grid_output_fname = os.path.join(voxels_output_dirname, os.path.splitext(temp)[0]+".npz")
        np.savez_compressed(voxel_grid_output_fname, sdf)

        grasp_output_fname = os.path.join(grasps_output_dirname, os.path.splitext(temp)[0]+".h5")

        with h5py.File(grasp_output_fname, 'w') as new_data:
            new_data.create_dataset("grasps/transforms", data=f['constrained_grasps'])
            new_data.create_dataset("object/file", data=mesh_fname)
            new_data.create_dataset("object/scale", data=mesh_scale)
            new_data.create_dataset("object/norm_scale", data=norm_scale)
            new_data.create_dataset("object/center_scale", data=center_scale)
            # These query points are already inverted by cam_pose_inv
            new_data.create_dataset("object/query_points", data=f['new_qp'])
            new_data.create_dataset("camera_pose", data=f['cam_pose'])
            new_data.create_dataset("camera_pose_inv", data=f['cam_pose_inv'])
    except Exception as e:
        return 1

    return 0

failed_counts = 0
for i in tqdm(range(len(cong_files))):
    # print(i)
    s = cong_data_prepare(cong_files[i])
    failed_counts += s

Multiprocessing setup

In [30]:
import multiprocessing

with multiprocessing.Pool(processes=4) as pool:
        results = pool.map(cong_data_prepare, cong_files)

# voxel_grid_maker(grasp_paths[0])

  stacked = np.column_stack(stacked).round().astype(np.int64)


### Creating new masks with l2 distance values instead of boolean 1s and 0s

In [3]:
grasps_output_dirname = "/home/username/data/constrained/grasps"
grasp_files = [os.path.join(grasps_output_dirname, i) for i in os.listdir(grasps_output_dirname)]
len(grasp_files)

7610

In [4]:
grasp_fname = grasp_files[0]
with h5py.File(grasp_fname, 'r') as data:
    mesh_fname = data["object/file"][()].decode('utf-8')
    # mesh_type = mesh_fname.split('/')[1]
    # mesh_id = mesh_fname.split('/')[-1].split('.')[0]
    mesh_scale = data["object/scale"][()]
    mesh_norm_scale = data["object/norm_scale"][()]

    g = data['grasps/transforms'][()]
    mesh_center_scale = data["object/center_scale"][()]
    qp_unscaled = data['object/query_points'][()]
    
mesh_norm_scale, mesh_center_scale

(6.673825982930406, array([-4.65041885e-06,  2.13333187e-05,  1.19870689e-01]))

In [8]:
import numpy as np

def create_coordinate_grid(grid_size):
    """Create a 3D grid of voxel center coordinates."""
    # Generate a linear space from -1 to 1 for each dimension
    lin = np.linspace(-1, 1, grid_size)
    # Create a meshgrid for the 3D space
    x, y, z = np.meshgrid(lin, lin, lin, indexing='ij')
    # Stack the grids to a (grid_size, grid_size, grid_size, 3) array
    return np.stack((x, y, z), axis=-1)

def update_distance_mask(grid_size, points):
    """Update the mask with the closest distance to any of the points."""
    coordinate_grid = create_coordinate_grid(grid_size)
    
    mask = np.zeros((grid_size, grid_size, grid_size))
    mask.fill(np.inf)

    for point in points:
        distances = np.linalg.norm(coordinate_grid - point, axis=-1)
        np.minimum(mask, distances, out=mask)
    
    return mask

# Example
grid_size = 32 
points = (qp_unscaled - mesh_center_scale) * mesh_norm_scale
mask = update_distance_mask(grid_size, points)
mask

array([[[1.37445774, 1.3523952 , 1.32976944, ..., 1.53247937,
         1.56700394, 1.60296197],
        [1.33886927, 1.31621033, 1.29326455, ..., 1.4991519 ,
         1.53414744, 1.57085776],
        [1.30550083, 1.28225237, 1.25900926, ..., 1.4677803 ,
         1.50334297, 1.5407874 ],
        ...,
        [1.30271986, 1.29065786, 1.28173361, ..., 1.45826005,
         1.48890866, 1.52167681],
        [1.33700505, 1.32525514, 1.31656543, ..., 1.49167831,
         1.52165398, 1.55373169],
        [1.37346666, 1.36203133, 1.35357773, ..., 1.52709223,
         1.55638599, 1.58776215]],

       [[1.32750268, 1.30464635, 1.28271254, ..., 1.49044872,
         1.52548303, 1.56239694],
        [1.29062009, 1.26709863, 1.24482847, ..., 1.45586553,
         1.49171227, 1.52944148],
        [1.25597043, 1.2317875 , 1.20920181, ..., 1.42336789,
         1.46001274, 1.49854027],
        ...,
        [1.24957629, 1.23699619, 1.22768193, ..., 1.41159684,
         1.44323674, 1.47701848],
        [1.2

In [25]:
dist_mask_output_dirname = "/home/username/data/constrained/constrain_masks_distance"

# For all grasps
for i in tqdm(range(len(grasp_files))):
    grasp_fname = grasp_files[i]
    temp = os.path.basename(grasp_fname)
    with h5py.File(grasp_fname, 'r') as data:
        mesh_fname = data["object/file"][()].decode('utf-8')
        # mesh_type = mesh_fname.split('/')[1]
        # mesh_id = mesh_fname.split('/')[-1].split('.')[0]
        mesh_scale = data["object/scale"][()]
        mesh_norm_scale = data["object/norm_scale"][()]

        g = data['grasps/transforms'][()]
        mesh_center_scale = data["object/center_scale"][()]
        qp_unscaled = data['object/query_points'][()]
    
    grid_size = 32
    points = (qp_unscaled - mesh_center_scale) * mesh_norm_scale
    mask = update_distance_mask(grid_size, points)
    # print(mask.shape)

    dist_mask_output_fname = os.path.join(dist_mask_output_dirname, os.path.splitext(temp)[0]+".npz")
    np.savez_compressed(dist_mask_output_fname, mask)
    # break;

100%|██████████| 7610/7610 [1:03:28<00:00,  2.00it/s]


### Checking if everything is running correctly

In [1]:
masks_output_dirname = "/home/username/data/constrained/constrain_masks"
grasps_output_dirname = "/home/username/data/constrained/grasps"
voxels_output_dirname = "/home/username/data/constrained/voxel_grids"

In [3]:
import os
voxels = os.listdir(voxels_output_dirname)
grasps = os.listdir(grasps_output_dirname)
masks = os.listdir(masks_output_dirname)

len(voxels), len(grasps), len(masks)

(7610, 7610, 7610)