In [1]:
import os
import io
import trimesh
import b3d
import genjax
import jax
import jax.numpy as jnp
import numpy as np
import rerun as rr
from PIL import Image
from b3d import Pose
from b3d import Mesh, Pose
from b3d.pose import Pose, camera_from_position_and_target
from scipy.spatial import transform
import h5py
import b3d.utils as utils
genjax.pretty()

In [2]:
# paths for reading physion metadata
physion_assets_path = os.path.join(
    b3d.get_root_path(),
    "assets/physion/",)

hdf5_file_path = os.path.join(physion_assets_path,
    "hdf5s/pilot_dominoes_0mid_d3chairs_o1plants_tdwroom_0012.hdf5",
)

In [3]:
vfov = 54.43222 
near_plane = 0.1
far_plane = 100
depth_arr = []
image_arr = []
with h5py.File(hdf5_file_path, "r") as f:
    # extract depth info
    for key in f['frames'].keys():
        depth = jnp.array(f['frames'][key]['images']['_depth_cam0'])
        depth_arr.append(depth)
        image = jnp.array(Image.open(io.BytesIO(f['frames'][key]['images']['_img_cam0'][:])))
        image_arr.append(image)
    depth_arr = jnp.asarray(depth_arr)
    image_arr = jnp.asarray(image_arr)
    FINAL_T, height, width = image_arr.shape[0], image_arr.shape[1], image_arr.shape[2]

    # extract camera info
    camera_azimuth = np.array(f['azimuth']['cam_0'])
    camera_matrix = np.array(f['frames']['0000']['camera_matrices']['camera_matrix_cam0']).reshape((4, 4))
    projection_matrix = np.array(f['frames']['0010']['camera_matrices']['projection_matrix_cam0']).reshape((4, 4))
  
    # Calculate the intrinsic matrix from vertical_fov.
    # Motice that hfov and vfov are different if height != width
    # We can also get the intrinsic matrix from opengl's perspective matrix.
    # http://kgeorge.github.io/2014/03/08/calculating-opengl-perspective-matrix-from-opencv-intrinsic-matrix
    vfov = vfov / 180.0 * np.pi
    tan_half_vfov = np.tan(vfov / 2.0)
    tan_half_hfov = tan_half_vfov * width / float(height)
    fx = width / 2.0 / tan_half_hfov  # focal length in pixel space
    fy = height / 2.0 / tan_half_vfov

    # extract object info
    object_ids = np.array(f['static']['object_ids'])
    model_names = np.array(f['static']['model_names'])
    distractors = np.array(f['static']['distractors'])
    occluders = np.array(f['static']['occluders'])
    initial_position = np.array(f['static']['initial_position'])
    initial_rotation = np.array(f['static']['initial_rotation'])
    scales = np.array(f['static']['scale'])
    meshes_faces = [np.array(f['static']['mesh'][f'faces_{idx}']) for idx in range(len(object_ids))]
    meshes_vertices = [np.array(f['static']['mesh'][f'vertices_{idx}']) for idx in range(len(object_ids))]

In [7]:
def scale_mesh(vertices, scale_factor):
    vertices[:, 0] *= scale_factor[0]
    vertices[:, 1] *= scale_factor[1]
    vertices[:, 2] *= scale_factor[2]
    vertices[:,[2,1]] = vertices[:,[1,2]]
    return vertices

def euler_angles_to_quaternion(euler: np.ndarray) -> np.ndarray:
    """
    Convert Euler angles to a quaternion.

    Source: https://pastebin.com/riRLRvch

    :param euler: The Euler angles vector.

    :return: The quaternion representation of the Euler angles.
    """
    euler[[2,1]] = euler[[1,2]]
    pitch = np.radians(euler[0] * 0.5)
    cp = np.cos(pitch)
    sp = np.sin(pitch)

    yaw = np.radians(euler[1] * 0.5)
    cy = np.cos(yaw)
    sy = np.sin(yaw)

    roll = np.radians(euler[2] * 0.5)
    cr = np.cos(roll)
    sr = np.sin(roll)

    x = sy * cp * sr + cy * sp * cr
    y = sy * cp * cr - cy * sp * sr
    z = cy * cp * sr - sy * sp * cr
    w = cy * cp * cr + sy * sp * sr
    return np.abs(np.array([x, y, z, w]))

In [8]:
excluded_model_ids = np.concatenate((np.where(model_names==distractors), np.where(model_names==occluders)), axis=0)
included_model_names = [model_names[idx] for idx in range(len(object_ids)) if idx not in excluded_model_ids]
included_model_ids = [object_ids[idx]-1 for idx in range(len(object_ids)) if idx not in excluded_model_ids]


In [9]:
object_initial_positions = [np.array([pos[0],pos[2],pos[1]]) for idx, pos in enumerate(initial_position) if idx in included_model_ids]
object_initial_rotations = [rot for idx, rot in enumerate(initial_rotation) if idx in included_model_ids]
object_scales = [scale for idx, scale in enumerate(scales) if idx in included_model_ids]
object_meshes = [(scale_mesh(vertex, object_scales[idx]), face) for idx, (face, vertex) in enumerate(zip(meshes_faces, meshes_vertices)) if idx in included_model_ids]


In [10]:
b3d.rr_init("demo_physion")

In [11]:
rr.log("/", rr.ViewCoordinates.RIGHT_HAND_Y_DOWN, static=True)  # Set an up-axis


In [12]:
all_object_poses = []
all_meshes = []
for idx in range(len(included_model_ids)):
    object_pose = Pose(jnp.asarray(object_initial_positions[idx]), jnp.asarray(euler_angles_to_quaternion(object_initial_rotations[idx])))
    print(object_pose)
    b3d.rr_log_pose(f"{idx}", object_pose)
    all_object_poses.append(object_pose)

    mesh = trimesh.Trimesh(vertices=object_meshes[idx][0], faces=object_meshes[idx][1])
    mesh = b3d.Mesh.from_trimesh(mesh)
    all_meshes.append(mesh)
    mesh.transform(object_pose).rr_visualize(f"mesh_{idx}")

Pose(position=Array([0.85, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32))
Pose(position=Array([0.25, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32))
Pose(position=Array([-0.25,  0.  ,  0.  ], dtype=float32), quaternion=Array([0.        , 0.        , 0.10117607, 0.9948685 ], dtype=float32))


In [13]:
R = camera_matrix[:3,:3]
T = camera_matrix[0:3, 3]
a = np.array([-R[0,:], -R[1,:], -R[2,:]])
b = np.array(T)
camera_position_from_matrix = np.linalg.solve(a, b)
camera_position_from_matrix[[2,1]] = camera_position_from_matrix[[1,2]]
# swap = np.array([[1,0,0],[0,0,1],[0,1,0]])
# cam_r = np.transpose(R @ jnp.linalg.inv(swap))
# cam_r_new = np.transpose(np.array([cam_r[:,0],-cam_r[:,1],-cam_r[:,2]]))
# camera_pose = Pose(
#     camera_position_from_matrix,
#     b3d.Rot.from_matrix(cam_r_new).as_quat()
# )

In [14]:
swap_y_z = np.array([[1,0,0],[0,0,1],[0,1,0]])
camera_rotation_from_matrix = R @ swap_y_z
rot = np.array([[1, 0, 0],
                [0, -1, 0],
                [0, 0, -1]])
camera_rotation_from_matrix = np.dot(np.transpose(camera_rotation_from_matrix), rot)
print(camera_rotation_from_matrix)


[[-0.87257326 -0.329855    0.36029425]
 [-0.48848364  0.58921647 -0.64358997]
 [ 0.         -0.7375769  -0.67526317]]


In [15]:
camera_pose = Pose(
    camera_position_from_matrix,
    b3d.Rot.from_matrix(camera_rotation_from_matrix).as_quat()
)
utils.rr_log_pose("camera_pose", camera_pose)

In [16]:
# camera_look_at = jnp.zeros(3)
# camera_pose = Pose.from_position_and_target(
#     camera_position_from_matrix,
#     camera_look_at
# )
# utils.rr_log_pose("camera_pose", camera_pose)
# print(camera_pose.as_matrix())

In [17]:
renderer = b3d.RendererOriginal(
    width=width,
    height=height,
    fx=fx,
    fy=fy,
    cx=width/2,
    cy=height/2,
    near=near_plane,
    far=far_plane,
)

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


In [18]:
scene_mesh = Mesh.transform_and_merge_meshes(all_meshes, all_object_poses)
scene_mesh_in_camera_frame = scene_mesh.transform(camera_pose.inv())
# scene_mesh_in_camera_frame.rr_visualize("scene_mesh_in_camera_frame")

In [19]:
rgbd = renderer.render_rgbd_from_mesh(
    scene_mesh_in_camera_frame
)
b3d.rr_log_depth(rgbd[...,3], "depth")
b3d.rr_log_depth(depth_arr[10], "depth/observed")