In [1]:
import os
import io
import trimesh
import b3d
import genjax
import jax
import jax.numpy as jnp
import numpy as np
import rerun as rr
from PIL import Image
from b3d import Pose
from b3d import Mesh, Pose
from b3d.pose import Pose, camera_from_position_and_target
import h5py
import b3d.utils as utils
from scipy.spatial.transform import Rotation as scipyR
# genjax.pretty()

In [2]:
# paths for reading physion metadata
physion_assets_path = os.path.join(
    b3d.get_root_path(),
    "assets/physion/",)

stim_name = 'pilot_it2_rollingSliding_simple_ramp_box_0024'

hdf5_file_path = os.path.join(physion_assets_path,
    f"hdf5s/{stim_name}.hdf5",
)

video_file_path = os.path.join(physion_assets_path,
    f"videos/{stim_name}_img.mp4",
)

In [3]:
vfov = 54.43222 
near_plane = 0.1
far_plane = 100
depth_arr = []
image_arr = []
with h5py.File(hdf5_file_path, "r") as f:
    # extract depth info
    for key in f['frames'].keys():
        depth = jnp.array(f['frames'][key]['images']['_depth_cam0'])
        depth_arr.append(depth)
        image = jnp.array(Image.open(io.BytesIO(f['frames'][key]['images']['_img_cam0'][:])))
        image_arr.append(image)
    depth_arr = jnp.asarray(depth_arr)
    image_arr = jnp.asarray(image_arr)
    FINAL_T, height, width = image_arr.shape[0], image_arr.shape[1], image_arr.shape[2]

    # extract camera info
    camera_azimuth = np.array(f['azimuth']['cam_0'])
    camera_matrix = np.array(f['frames']['0000']['camera_matrices']['camera_matrix_cam0']).reshape((4, 4))
    projection_matrix = np.array(f['frames']['0010']['camera_matrices']['projection_matrix_cam0']).reshape((4, 4))
  
    # Calculate the intrinsic matrix from vertical_fov.
    # Motice that hfov and vfov are different if height != width
    # We can also get the intrinsic matrix from opengl's perspective matrix.
    # http://kgeorge.github.io/2014/03/08/calculating-opengl-perspective-matrix-from-opencv-intrinsic-matrix
    vfov = vfov / 180.0 * np.pi
    tan_half_vfov = np.tan(vfov / 2.0)
    tan_half_hfov = tan_half_vfov * width / float(height)
    fx = width / 2.0 / tan_half_hfov  # focal length in pixel space
    fy = height / 2.0 / tan_half_vfov

    # extract object info
    object_ids = np.array(f['static']['object_ids'])
    model_names = np.array(f['static']['model_names'])
    distractors = np.array(f['static']['distractors']) if np.array(f['static']['distractors']).size != 0 else None
    occluders = np.array(f['static']['occluders']) if np.array(f['static']['occluders']).size != 0 else None
    initial_position = np.array(f['static']['initial_position'])
    initial_rotation = np.array(f['static']['initial_rotation'])
    scales = np.array(f['static']['scale'])
    meshes_faces = [np.array(f['static']['mesh'][f'faces_{idx}']) for idx in range(len(object_ids))]
    meshes_vertices = [np.array(f['static']['mesh'][f'vertices_{idx}']) for idx in range(len(object_ids))]

In [4]:
def scale_mesh(vertices, scale_factor):
    vertices[:, 0] *= scale_factor[0]
    vertices[:, 1] *= scale_factor[1]
    vertices[:, 2] *= scale_factor[2]
    # vertices[:,[2,1]] = vertices[:,[1,2]]
    return vertices

def euler_angles_to_quaternion(euler: np.ndarray) -> np.ndarray:
    """
    Convert Euler angles to a quaternion.

    Source: https://pastebin.com/riRLRvch

    :param euler: The Euler angles vector.

    :return: The quaternion representation of the Euler angles.
    """
    pitch = np.radians(euler[0] * 0.5)
    cp = np.cos(pitch)
    sp = np.sin(pitch)

    yaw = np.radians(euler[1] * 0.5)
    cy = np.cos(yaw)
    sy = np.sin(yaw)

    roll = np.radians(euler[2] * 0.5)
    cr = np.cos(roll)
    sr = np.sin(roll)

    x = sy * cp * sr + cy * sp * cr
    y = sy * cp * cr - cy * sp * sr
    z = cy * cp * sr - sy * sp * cr
    w = cy * cp * cr + sy * sp * sr
    # return np.array([x, -z, -y, w])
    return np.array([x, y, z, w])

# def swap_yz_axes_quaternion(q):
#     """
#     Perform a transformation on a quaternion `q` to account for swapping the y and z axes.
    
#     Parameters:
#         q (array-like): Quaternion [w, x, y, z] in reference frame A.

#     Returns:
#         q_transformed (array-like): Quaternion [w, x, y, z] in reference frame B.
#     """
#     # Convert to [w, x, y, z] format
#     q[[1,3]] = q[[3,1]]

#     # Define the quaternion for a 90-degree rotation about the x-axis
#     r_x = scipyR.from_euler('x', 90, degrees=True).as_quat()  # This gives [x, y, z, w] format

#     # Convert to [w, x, y, z] format
#     r_x = np.array([r_x[3], r_x[0], r_x[1], r_x[2]])
    
#     # Convert input quaternion q to [x, y, z, w] format
#     q_input = np.array([q[1], q[2], q[3], q[0]])
    
#     # Convert to rotation objects
#     q_rot = scipyR.from_quat(q_input)  # Create a rotation object from input quaternion
#     r_rot = scipyR.from_quat(r_x)      # Create a rotation object for the swap rotation

#     # Perform the transformation q' = r_x * q * r_x_inverse
#     q_transformed = r_rot * q_rot * r_rot.inv()

#     # Convert the result back to [w, x, y, z] format
#     q_transformed = q_transformed.as_quat()
#     return [q_transformed[0], q_transformed[1], q_transformed[2], q_transformed[3]]

In [5]:
excluded_model_ids = np.concatenate((np.where(model_names==distractors), np.where(model_names==occluders)), axis=0)
included_model_names = [model_names[idx] for idx in range(len(object_ids)) if idx not in excluded_model_ids]
included_model_ids = [object_ids[idx]-1 for idx in range(len(object_ids)) if idx not in excluded_model_ids]


In [6]:
object_initial_positions = [pos for idx, pos in enumerate(initial_position) if idx in included_model_ids]
object_initial_rotations = [rot for idx, rot in enumerate(initial_rotation) if idx in included_model_ids]
object_scales = [scale for idx, scale in enumerate(scales) if idx in included_model_ids]
object_meshes = [(scale_mesh(vertex, object_scales[idx]), face) for idx, (face, vertex) in enumerate(zip(meshes_faces, meshes_vertices)) if idx in included_model_ids]


In [7]:
b3d.rr_init("demo_physion")

In [8]:
# rr.log("/", rr.ViewCoordinates.RIGHT_HAND_Y_DOWN, static=True)  # Set an up-axis
rr.log("/", rr.ViewCoordinates.LEFT_HAND_Y_UP, static=True)  # Set an up-axis

In [9]:
all_object_poses = []
all_meshes = []
for idx in range(len(included_model_ids)):
    object_pose = Pose(jnp.asarray(object_initial_positions[idx]), jnp.asarray(euler_angles_to_quaternion(object_initial_rotations[idx])))
    print(object_pose)
    b3d.rr_log_pose(f"{idx}", object_pose)
    all_object_poses.append(object_pose)

    mesh = trimesh.Trimesh(vertices=object_meshes[idx][0], faces=object_meshes[idx][1])
    mesh = b3d.Mesh.from_trimesh(mesh)
    all_meshes.append(mesh)
    mesh.transform(object_pose).rr_visualize(f"mesh_{idx}")

Pose(position=Array([1.25, 0.  , 0.  ], dtype=float32), quaternion=Array([0., 0., 0., 1.], dtype=float32))
Pose(position=Array([-0.625,  0.01 ,  0.   ], dtype=float32), quaternion=Array([0.000000e+00, 1.000000e+00, 0.000000e+00, 6.123234e-17], dtype=float32))
Pose(position=Array([-0.8202851,  1.1506727,  0.       ], dtype=float32), quaternion=Array([ 0.04738792, -0.8796334 ,  0.4724209 ,  0.02859946], dtype=float32))


In [10]:
R = camera_matrix[:3,:3]
T = camera_matrix[0:3, 3]
a = np.array([-R[0,:], -R[1,:], -R[2,:]])
b = np.array(T)
camera_position_from_matrix = np.linalg.solve(a, b)
# camera_position_from_matrix[[2,1]] = camera_position_from_matrix[[1,2]]
print(camera_position_from_matrix)

[ 0.9407364  2.983262  -1.6307147]


In [11]:
camera_rotation_from_matrix = -np.transpose(R)
camera_pose = Pose(
    camera_position_from_matrix,
    b3d.Rot.from_matrix(camera_rotation_from_matrix).as_quat()
)
utils.rr_log_pose("camera_pose", camera_pose)


In [12]:
# camera_look_at = jnp.array([0, 0, 0])
# camera_pose = Pose.from_position_and_target(
#     camera_position_from_matrix,
#     camera_look_at,
#     up=jnp.array([0.0, 1.0, 0.0]),
# )
# utils.rr_log_pose("camera_pose", camera_pose)
# # print(camera_pose.as_matrix())

In [13]:
renderer = b3d.RendererOriginal(
    width=width,
    height=height,
    fx=fx,
    fy=fy,
    cx=width/2,
    cy=height/2,
    near=near_plane,
    far=far_plane,
)

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


In [14]:
scene_mesh = Mesh.transform_and_merge_meshes(all_meshes, all_object_poses)
scene_mesh_in_camera_frame = scene_mesh.transform(camera_pose.inv())
# scene_mesh_in_camera_frame.rr_visualize("scene_mesh_in_camera_frame")

In [15]:
rgbd = renderer.render_rgbd_from_mesh(
    scene_mesh_in_camera_frame
)
b3d.rr_log_depth(rgbd[...,3], "depth/")
b3d.rr_log_depth(np.flip(depth_arr[0],1), "depth/observed")

In [16]:
# with h5py.File(hdf5_file_path, "r") as f:
#     for key in f['frames'].keys():
#         ang_vel = jnp.array(f['frames'][key]['objects']['rotations_cam0'])
#         print(ang_vel)
#         break