In [1]:
import os
import io
import trimesh
import b3d
import genjax
import jax
import jax.numpy as jnp
import numpy as np
import rerun as rr
from PIL import Image
from b3d import Pose
from b3d import Mesh, Pose
from b3d.pose import Pose, camera_from_position_and_target
import matplotlib.pyplot as plt
import h5py
import b3d.utils as utils
from scipy.spatial.transform import Rotation as scipyR

In [2]:
def scale_mesh(vertices, scale_factor):
    vertices[:, 0] *= scale_factor[0]
    vertices[:, 1] *= scale_factor[1]
    vertices[:, 2] *= scale_factor[2]
    return vertices

def euler_angles_to_quaternion(euler: np.ndarray) -> np.ndarray:
    """
    Convert Euler angles to a quaternion.

    Source: https://pastebin.com/riRLRvch

    :param euler: The Euler angles vector.

    :return: The quaternion representation of the Euler angles.
    """
    pitch = np.radians(euler[0] * 0.5)
    cp = np.cos(pitch)
    sp = np.sin(pitch)

    yaw = np.radians(euler[1] * 0.5)
    cy = np.cos(yaw)
    sy = np.sin(yaw)

    roll = np.radians(euler[2] * 0.5)
    cr = np.cos(roll)
    sr = np.sin(roll)

    x = sy * cp * sr + cy * sp * cr
    y = sy * cp * cr - cy * sp * sr
    z = cy * cp * sr - sy * sp * cr
    w = cy * cp * cr + sy * sp * sr
    return np.array([x, y, z, w])

In [None]:
# paths for reading physion metadata
physion_assets_path = os.path.join(
    b3d.get_root_path(),
    "assets/physion/",)

resnet_inference_path = os.path.join(
    b3d.get_root_path(),
    "resnet_results/",)

stim_name = 'pilot_dominoes_0mid_d3chairs_o1plants_tdwroom_0001'

hdf5_file_path = os.path.join(physion_assets_path,
    f"{stim_name}.hdf5",
)

mesh_file_path = os.path.join(physion_assets_path,
    f"all_flex_meshes/core",
)

json_file_path = os.path.join(resnet_inference_path,
    f"{stim_name}.json",
)

im_width = 350
im_height = 350

In [4]:
vfov = 54.43222 
near_plane = 0.1
far_plane = 100
depth_arr = []
image_arr = []
with h5py.File(hdf5_file_path, "r") as f:
    # extract depth info
    for key in f['frames'].keys():
        depth = jnp.array(f['frames'][key]['images']['_depth_cam0'])
        depth_arr.append(depth)
        image = jnp.array(Image.open(io.BytesIO(f['frames'][key]['images']['_img_cam0'][:])))
        image_arr.append(image)
    depth_arr = jnp.asarray(depth_arr)
    image_arr = jnp.asarray(image_arr)
    FINAL_T, height, width = image_arr.shape[0], image_arr.shape[1], image_arr.shape[2]

    # extract camera info
    camera_azimuth = np.array(f['azimuth']['cam_0'])
    camera_matrix = np.array(f['frames']['0000']['camera_matrices']['camera_matrix_cam0']).reshape((4, 4))
    projection_matrix = np.array(f['frames']['0010']['camera_matrices']['projection_matrix_cam0']).reshape((4, 4))
  
    # Calculate the intrinsic matrix from vertical_fov.
    # Motice that hfov and vfov are different if height != width
    # We can also get the intrinsic matrix from opengl's perspective matrix.
    # http://kgeorge.github.io/2014/03/08/calculating-opengl-perspective-matrix-from-opencv-intrinsic-matrix
    vfov = vfov / 180.0 * np.pi
    tan_half_vfov = np.tan(vfov / 2.0)
    tan_half_hfov = tan_half_vfov * width / float(height)
    fx = width / 2.0 / tan_half_hfov  # focal length in pixel space
    fy = height / 2.0 / tan_half_vfov

    # extract object info
    object_ids = np.array(f['static']['object_ids'])
    model_names = np.array(f['static']['model_names'])
    assert len(object_ids) == len(model_names)
    distractors = np.array(f['static']['distractors']) if np.array(f['static']['distractors']).size != 0 else None
    occluders = np.array(f['static']['occluders']) if np.array(f['static']['occluders']).size != 0 else None
    initial_position = np.array(f['static']['initial_position'])
    initial_rotation = np.array(f['static']['initial_rotation'])
    scales = np.array(f['static']['scale'])

In [5]:
distractor_ids = np.concatenate([np.where(model_names==distractor)[0] for distractor in distractors], axis=0).tolist() if distractors else []
occluder_ids = np.concatenate([np.where(model_names==occluder)[0] for occluder in occluders], axis=0).tolist() if occluders else []
excluded_model_ids = distractor_ids+occluder_ids
included_model_ids = [idx for idx in range(len(object_ids)) if idx not in excluded_model_ids]
included_model_names = [model_names[idx] for idx in included_model_ids]


In [6]:
object_initial_positions = [pos for idx, pos in enumerate(initial_position) if idx in included_model_ids]
object_initial_rotations = [rot for idx, rot in enumerate(initial_rotation) if idx in included_model_ids]
object_scales = [scale for idx, scale in enumerate(scales) if idx in included_model_ids]
object_meshes = []
for idx, model_name in enumerate(included_model_names):
    trim = trimesh.load(os.path.join(mesh_file_path, f"{model_name.decode('UTF-8')}.obj"))
    object_meshes.append((scale_mesh(trim.vertices, object_scales[idx]), trim.faces))


In [7]:
b3d.rr_init("demo_physion")

In [8]:
# rr.log("/", rr.ViewCoordinates.RIGHT_HAND_Y_DOWN, static=True)  # Set an up-axis
# rr.log("/", rr.ViewCoordinates.LEFT_HAND_Y_UP, static=True)  # Set an up-axis

In [9]:
all_object_poses = []
all_meshes = []
for idx in range(len(included_model_ids)):
    object_pose = Pose(jnp.asarray(object_initial_positions[idx]), jnp.asarray(euler_angles_to_quaternion(object_initial_rotations[idx])))
    print(object_pose)
    b3d.rr_log_pose(f"{idx}", object_pose)
    all_object_poses.append(object_pose)

    mesh = trimesh.Trimesh(vertices=object_meshes[idx][0], faces=object_meshes[idx][1])
    mesh = b3d.Mesh.from_trimesh(mesh)
    all_meshes.append(mesh)
    mesh.transform(object_pose).rr_visualize(f"mesh_{idx}")

Pose(position=Array([0.85, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32))
Pose(position=Array([0.25, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32))
Pose(position=Array([-0.25,  0.  ,  0.  ], dtype=float32), quaternion=Array([0.        , 0.13353197, 0.        , 0.9910445 ], dtype=float32))


In [10]:
R = camera_matrix[:3,:3]
T = camera_matrix[0:3, 3]
a = np.array([-R[0,:], -R[1,:], -R[2,:]])
b = np.array(T)
camera_position_from_matrix = np.linalg.solve(a, b)
camera_rotation_from_matrix = -np.transpose(R)
camera_pose = Pose(
    camera_position_from_matrix,
    b3d.Rot.from_matrix(camera_rotation_from_matrix).as_quat()
)
utils.rr_log_pose("camera_pose", camera_pose)


In [12]:
renderer = b3d.RendererOriginal(
    width=width,
    height=height,
    fx=fx,
    fy=fy,
    cx=width/2,
    cy=height/2,
    near=near_plane,
    far=far_plane,
)

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


In [13]:
all_object_poses

[Pose(position=Array([0.85, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32)),
 Pose(position=Array([0.25, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32)),
 Pose(position=Array([-0.25,  0.  ,  0.  ], dtype=float32), quaternion=Array([0.        , 0.13353197, 0.        , 0.9910445 ], dtype=float32))]

In [14]:
scene_mesh = Mesh.transform_and_merge_meshes(all_meshes, all_object_poses)
scene_mesh_in_camera_frame = scene_mesh.transform(camera_pose.inv())
# scene_mesh_in_camera_frame = scene_mesh.transform(camera_pose)
# scene_mesh_in_camera_frame.rr_visualize("scene_mesh_in_camera_frame")

In [16]:
rgbd = renderer.render_rgbd_from_mesh(
    scene_mesh_in_camera_frame
)
b3d.rr_log_depth(rgbd[...,3], "depth/")
b3d.rr_log_depth(np.flip(depth_arr[0],1), "depth/observed")

In [28]:
# with h5py.File(hdf5_file_path, "r") as f:
#     for key in f['frames'].keys():
#         ang_vel = jnp.array(f['frames'][key]['objects']['rotations_cam0'])
#         print(ang_vel)
#         break