# Display The Data


In [None]:
import matplotlib.pyplot as plt
import torch
import numpy as np
from pathlib import Path
from lib.data.utils import (
    load_color,
    load_depth,
    load_mask,
    load_color_masked,
    load_depth_masked,
)

image_idx = 0
data_dir = Path("/Users/robinborth/Code/GuidedResearch/data/dphm_christoph_mouthmove")

color = load_color(data_dir, image_idx, return_tensor="img")
display(color)

depth = load_depth(data_dir, image_idx, return_tensor="np")
plt.imshow(depth)
plt.show()

mask = load_mask(data_dir, image_idx, return_tensor="np")
plt.imshow(mask)
plt.show()

color_masked = load_color_masked(data_dir, image_idx, return_tensor="img")
display(color_masked)

depth_masked = load_depth_masked(data_dir, image_idx, return_tensor="np")
plt.imshow(depth_masked)
plt.show()

# Pipnet Landmarks


In [None]:
from lib.data.utils import (
    load_pipnet_image,
    load_pipnet_landmark_2d,
    load_pipnet_landmark_3d,
)

pipnet_landmarks_2d = load_pipnet_landmark_2d(data_dir, idx=image_idx)

print(f"{pipnet_landmarks_2d.shape=}")
print(pipnet_landmarks_2d[:5, :])

pipnet_landmarks_3d = load_pipnet_landmark_3d(data_dir, idx=image_idx)
print(f"{pipnet_landmarks_3d.shape=}")
print(pipnet_landmarks_3d[:5, :])
print("depth", pipnet_landmarks_3d[:, 2])
plt.hist(pipnet_landmarks_3d[:, 2])
plt.show()

pipnet_image = load_pipnet_image(data_dir, idx=image_idx)
display(pipnet_image)

# Medipipe Landmarks


In [None]:
from lib.data.utils import (
    load_mediapipe_image,
    load_mediapipe_landmark_2d,
    load_mediapipe_landmark_3d,
)

mediapipe_landmarks_2d = load_mediapipe_landmark_2d(data_dir, idx=image_idx)

print(f"{mediapipe_landmarks_2d.shape=}")
print(mediapipe_landmarks_2d[:5, :])

mediapipe_landmarks_3d = load_mediapipe_landmark_3d(data_dir, idx=image_idx)
print(f"{mediapipe_landmarks_3d.shape=}")
print(mediapipe_landmarks_3d[:5, :])
print("depth", mediapipe_landmarks_3d[:50, 2])
plt.hist(mediapipe_landmarks_3d[:, 2])
plt.show()

mediapip_image = load_mediapipe_image(data_dir, idx=image_idx)
display(mediapip_image)

# Depth

From https://cvg.cit.tum.de/data/datasets/rgbd-dataset/file_formats

The color and depth images are already pre-registered using the OpenNI driver from PrimeSense, i.e., the pixels in the color and depth images correspond already 1:1.

The depth images are scaled by a factor of 1000, i.e., a pixel value of 1000 in the depth image corresponds to a distance of 1 meter from the camera. A pixel value of 0 means missing value/no data.


In [None]:
from lib.data.utils import (
    load_pipnet_landmark_2d,
    load_pipnet_landmark_3d,
    load_depth_masked,
)

depth = load_depth_masked(data_dir, image_idx, return_tensor="np", depth_factor=1000)
plt.imshow(depth)

# draw all of the lm on the screen
lm = load_pipnet_landmark_2d(data_dir, idx=image_idx)
for point in lm:
    x, y = point.astype(int)
    plt.scatter(x, y, c="red", s=10)  # Drawing a red point for each landmark
plt.show()

In [None]:
lm_idx = 0

lm3d = load_pipnet_landmark_3d(data_dir, idx=image_idx)[lm_idx]
print(lm3d)

lm2d = load_pipnet_landmark_2d(data_dir, idx=image_idx)[lm_idx]
x, y = lm2d.astype(int)
print(lm2d)

depth = load_depth_masked(data_dir, image_idx, return_tensor="np", depth_factor=1000)
print(depth[y, x])
plt.imshow(depth)

x, y = lm2d.astype(int)
plt.scatter(x, y, c="red", s=10)  # Drawing a red point for each landmark
plt.show()

We can see that we have a point in 3D which is:

[-0.051, -0.042, 0.575] (x, y, z)

The coresponding pixel value is:

[878, 480] (x, y)

How do we get from 3D to 2D screen coordinates?

Input:
fx = 914.415
fy = 914.03
cx = 959.598
cy = 547.202
xyz_camera = [-0.051, -0.042, 0.575] (x, y, z_c)

Output:
uvz_pixel = [878.0, 480.0, 0.575] (u, v, z_c)


In [None]:
from lib.data.utils import load_pipnet_landmark_3d
from lib.camera import load_intrinsics, camera2pixel

lm3d = load_pipnet_landmark_3d(data_dir, idx=image_idx)
lm2d = load_pipnet_landmark_3d(data_dir, idx=image_idx)
K = load_intrinsics(data_dir=data_dir)
camera2pixel(lm3d[:2], **K)

# Normals and Points in 3D


In [None]:
from lib.data.utils import load_normals_3d, load_points_3d
import open3d as o3d

normals = load_normals_3d(data_dir=data_dir, idx=0)
print(f"{normals.shape=}")
print(normals[:5, :])

points = load_points_3d(data_dir=data_dir, idx=0)
print(f"{points.shape=}")
print(points[:5, :])

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
o3d.visualization.draw_plotly([pcd])

In [None]:
import open3d as o3d
from lib.data.utils import load_points_3d
from pathlib import Path

data_dir = Path("/Users/robinborth/Code/GuidedResearch/data/dphm_christoph_mouthmove")
points = load_points_3d(data_dir=data_dir, idx=0)
print(f"{points.shape=}")
print(points[:5, :])

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
o3d.visualization.draw_plotly([pcd])

In [None]:
import numpy as np
path = "/Users/robinborth/Code/GuidedResearch/data/dphm_christoph_mouthmove/camera/c00_color_extrinsic.txt"
E = np.zeros((4, 4))
E[3, 3] = 1.0
E[:3, :] = np.loadtxt(path).reshape(3, 4)  # extrinsic hence world to camera

# note that the pose is the camera to world, e.g. if flame calls them pose they mean
# that they project from camera to world coordinates, hence the final mesh vertices lives
# in the world coordinate system! This is so important!
# note that this is 4x4
# we need to project the point from camera to world! because the point cloud is in camera
# we can see that because the coordinate system is right-hand where z-axes goes inside and 
# y-axes goes down, usually z goes to the camera and y up (see cv2 reference)
pose = np.linalg.inv(E)  # camera to world, hence this is the "pose" they call it that.

points_c_homo = np.zeros((points.shape[0], 4))
points_c_homo[:, 3] = 1.0
points_c_homo[:, :3] = points


points_w_homo = (E @ points_c_homo.T).T

In [None]:
points_w_homo = (pose[:3, :3] @ points.T).T
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points_w_homo)
o3d.visualization.draw_plotly([pcd])

In [None]:
E

In [None]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points_w_homo[:, :3])
o3d.visualization.draw_plotly([pcd])