In [2]:
from pose import draw_pose, yolo_pose_inference
import torch
from depth_anything_3.api import DepthAnything3
import cv2
import numpy as np
from fov_estimator import FOVEstimator
import open3d as o3d
import os
import smplx

[93m[WARN ] Dependency `gsplat` is required for rendering 3DGS. Install via: pip install git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70[0m
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DepthAnything3.from_pretrained("depth-anything/DA3METRIC-LARGE")
model = model.to(device=device)


## run infernce on tom cruise image



images = ["/home/khater/pose-check/tom.jpg"]

prediction = model.inference(
    images,
    export_dir="output",
    # export_format="colmap"  # Options: glb, npz, ply, mini_npz, gs_ply, gs_video
)
depth_prediction = prediction.depth[0]
print(prediction.processed_images.shape)  # Processed images: [N, H, W, 3] uint8

[97m[INFO ] using MLP layer as FFN[0m
[97m[INFO ] Processed Images Done taking 0.024598360061645508 seconds. Shape:  torch.Size([1, 3, 336, 504])[0m
[97m[INFO ] Model Forward Pass Done. Time: 0.3353147506713867 seconds[0m
[97m[INFO ] Conversion to Prediction Done. Time: 0.001485586166381836 seconds[0m
[97m[INFO ] Export Results Done. Time: 0.0002181529998779297 seconds[0m
(1, 336, 504, 3)


In [4]:
fov_estimator = FOVEstimator(name="moge2", device=device)
cam_intrinsics = fov_estimator.get_cam_intrinsics(img=prediction.processed_images[0])
# create point cloud

########### Using fov estimator: MoGe2...


In [7]:
results = yolo_pose_inference(images)


image 1/1 /home/khater/pose-check/tom.jpg: 448x640 1 person, 63.8ms
Speed: 1.6ms preprocess, 63.8ms inference, 1.2ms postprocess per image at shape (1, 3, 448, 640)


In [8]:
# create_point_cloud(depth_map, image, cam_intrinsics):
def create_point_cloud(depth_map, image, cam_intrinsics):
    width, height = depth_map.shape[1], depth_map.shape[0]
    fx = cam_intrinsics[0, 0, 0]
    fy = cam_intrinsics[0, 1, 1]
    # Generate mesh grid and calculate point cloud coordinates
    x, y = np.meshgrid(np.arange(width), np.arange(height))
    x = (x - width / 2) / fx
    y = (y - height / 2) / fy
    z = np.array(depth_map)
    points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
    colors = np.array(image).reshape(-1, 3) / 255.0

    # Create the point cloud and save it to the output directory
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    o3d.io.write_point_cloud(os.path.join("output", "point_cloud.ply"), pcd)
    return pcd

def handle_yolo_results(image, results, ratio_w=1.0, ratio_h=1.0):
 # Access the results
    key_points = []     

    print(f" image shape : {image.shape}")
    for result in results:
        if result.keypoints is not None:
            for person_kpts in result.keypoints.xy.cpu().numpy():
                # Add confidence if needed:
                conf = result.keypoints.conf.cpu().numpy()[0]
                keypoints = np.concatenate(
                    [person_kpts * [ratio_w, ratio_h], conf[:, None]], axis=1
                )
                key_points.append(keypoints)
                output_image = draw_pose(image, keypoints)
    # key_points_all.append(key_points)
    # cv2.imshow("Pose", img)
    # save image with poses
    # output_path = image_path.replace(".jpg", "_pose.jpg")
    # cv2.imwrite("overlayed_image.png", output_image)
    return output_image, key_points

def lift2d_keypoints_to_3d(keypoints_2d, depth_map, cam_intrinsics):
    fx = cam_intrinsics[0, 0, 0]
    fy = cam_intrinsics[0, 1, 1]
    cx = cam_intrinsics[0, 0, 2]
    cy = cam_intrinsics[0, 1, 2]

    keypoints_3d = []
    for keypoint in keypoints_2d:
        x_2d, y_2d = int(keypoint[0]), int(keypoint[1])
        z = depth_map[y_2d, x_2d]
        x = (x_2d - cx) * z / fx
        y = (y_2d - cy) * z / fy
        keypoints_3d.append([x, y, z])
    return np.array(keypoints_3d)



In [10]:
image_shape = cv2.imread(images[0]).shape
ratio_h = prediction.processed_images.shape[1] / image_shape[0]
ratio_w = prediction.processed_images.shape[2] / image_shape[1]
key_points_2d_all = []
# ratio_h = prediction.processed_images.shape[1] / image.shape[0]
for result in results:
    if result.keypoints is not None:
        for person_kpts in result.keypoints.xy.cpu().numpy():
            # Add confidence if needed:
            conf = result.keypoints.conf.cpu().numpy()[0]
            # keypoints = np.concatenate(
            #     [person_kpts, conf[:, None]], axis=1
            # )
            keypoints = np.concatenate(
                [person_kpts * [ratio_w, ratio_h], conf[:, None]], axis=1
            )
            key_points_2d_all.append(keypoints)
key_points_3d_all = []
for keypoints_2d in key_points_2d_all:
    keypoints_3d = lift2d_keypoints_to_3d(keypoints_2d, depth_prediction, cam_intrinsics)
    key_points_3d_all.append(keypoints_3d)

print("2D Keypoints:", key_points_2d_all)
print("3D Keypoints:", key_points_3d_all)

2D Keypoints: [array([[     255.29,      45.832,     0.99702],
       [     259.77,      40.182,     0.97757],
       [     249.82,       40.55,     0.99229],
       [     265.64,      42.892,     0.74154],
       [     240.49,      43.572,      0.9309],
       [     277.34,      77.169,     0.99794],
       [     226.75,      77.191,      0.9996],
       [     287.23,      120.68,     0.98147],
       [      214.2,      119.46,     0.99728],
       [      290.9,      158.94,      0.9797],
       [     211.48,      160.01,     0.99412],
       [     268.55,      161.61,     0.99966],
       [     235.71,      161.56,     0.99984],
       [     266.55,      225.64,     0.99856],
       [     229.69,      226.72,     0.99927],
       [     260.62,      286.35,     0.98472],
       [     229.45,      289.71,      0.9903]])]
3D Keypoints: [array([[   0.011416,    -0.46807,      1.9825],
       [   0.026624,    -0.48684,      1.9815],
       [  -0.011304,    -0.48232,      1.9631],
       [

In [25]:
# def a function that gets the angle between 3D connected keypoints
def calculate_joint_angles(keypoints_3d):
    # Define connections for joints (example for elbow and knee)
    connections = [
        (5, 7, 9),  # Left arm: shoulder-elbow-wrist
        (6, 8, 10), # Right arm: shoulder-elbow-wrist
        (11, 13, 15), # Left leg: hip-knee-ankle
        (12, 14, 16)  # Right leg: hip-knee-ankle
    ]
    angles = {}
    for (a, b, c) in connections:
        ba = keypoints_3d[a] - keypoints_3d[b]
        bc = keypoints_3d[c] - keypoints_3d[b]
        cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
        angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
        # angles[f"angle_{a}_{b}_{c}"] = np.degrees(angle)
        angles[b] = np.degrees(angle)
    return angles

In [31]:
import numpy as np

def normalize(v):
    n = np.linalg.norm(v)
    if n < 1e-8:
        return v
    return v / n

def skew(v):
    return np.array([
        [ 0,   -v[2],  v[1]],
        [ v[2],  0,   -v[0]],
        [-v[1], v[0],   0 ]
    ])

def rotation_from_vectors(a, b):
    """
    Compute rotation matrix that rotates vector a to vector b
    (no twist)
    """
    a = normalize(a)
    b = normalize(b)

    v = np.cross(a, b)
    c = np.dot(a, b)

    if np.linalg.norm(v) < 1e-8:
        return np.eye(3)

    vx = skew(v)
    R = np.eye(3) + vx + vx @ vx * ((1 - c) / (np.linalg.norm(v) ** 2))
    return R

def rotmat_to_axis_angle(R):
    angle = np.arccos(np.clip((np.trace(R) - 1) / 2, -1, 1))
    if angle < 1e-8:
        return np.zeros(3)

    axis = np.array([
        R[2,1] - R[1,2],
        R[0,2] - R[2,0],
        R[1,0] - R[0,1]
    ]) / (2 * np.sin(angle))

    return axis * angle


In [33]:
def initialize_smplx_pose_from_keypoints(
    keypoints_3d,
    rest_joints,
    parents
):
    """
    Args:
        keypoints_3d: (N,3) observed joints
        rest_joints:  (N,3) SMPL-X rest pose joints
        parents:      list of length N, parents[i] = parent index of joint i

    Returns:
        pose_axis_angle: (N,3) axis-angle rotations
    """

    N = keypoints_3d.shape[0]

    R_global = [np.eye(3) for _ in range(N)]
    pose_axis_angle = np.zeros((N, 3))

    for j in range(N):
        p = parents[j]
        if p < 0:
            continue

        d_rest = rest_joints[j] - rest_joints[p]
        d_obs  = keypoints_3d[j] - keypoints_3d[p]

        R = rotation_from_vectors(d_rest, d_obs)
        R_global[j] = R_global[p] @ R

    # Convert global â†’ local
    for j in range(N):
        p = parents[j]
        if p < 0:
            continue

        R_local = R_global[p].T @ R_global[j]
        pose_axis_angle[j] = rotmat_to_axis_angle(R_local)

    return pose_axis_angle


In [None]:
# --- Configuration ---
# 1. Path to your models folder (the directory containing 'smpl', 'smplx', etc.)
MODEL_FOLDER = 'data'
# 2. Choose the specific model
MODEL_TYPE = 'smplx' # Options: 'smpl', 'smplh', 'smplx'
GENDER = 'neutral' # Options: 'neutral', 'male', 'female'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' # Use 'cuda' if you have a GPU

# 3. Create the SMPL layer (a PyTorch module)
smpl_layer = smplx.create(
    model_path=MODEL_FOLDER,
    model_type=MODEL_TYPE,
    gender=GENDER,
    # This is important for batch processing
    num_betas=10,        # Number of shape coefficients
    num_expression_coeffs=10, # Only for SMPL-X
    ext='pkl',           # File extension of the model
).to(DEVICE)

print(f"Loaded {MODEL_TYPE} model on {DEVICE}")

print("SMPL Layer:", smpl_layer)

num_betas = smpl_layer.num_betas

# Initialize a batch of 1 person with an average shape (zeros)
betas = torch.zeros(1, num_betas, device=DEVICE)

# Example: Make the person taller (often beta[0] controls height)
# A value of +3.0 is a 3-standard-deviation change from the mean.
betas[0, 0] = 3.0

# 1. Global Orientation (No rotation - facing forward)
global_orient = torch.zeros(1, 1, 3, device=DEVICE)

# 2. Body Pose (T-pose/A-pose - all joints straight)
body_pose = torch.zeros(1, smpl_layer.NUM_BODY_JOINTS, 3, device=DEVICE)





Loaded smplx model on cuda
SMPL Layer: SMPLX(
  Gender: NEUTRAL
  Number of joints: 55
  Betas: 10
  Number of PCA components: 6
  Flat hand mean: False
  Number of Expression Coefficients: 10
  (vertex_joint_selector): VertexJointSelector()
)


In [None]:
# COCO to SMPL-X joint mapping (approximate)
coco_to_smplx = {
    0: 15,   # nose -> head
    5: 17,   # left_shoulder -> left_shoulder
    6: 16,   # right_shoulder -> right_shoulder
    7: 19,   # left_elbow -> left_elbow
    8: 18,   # right_elbow -> right_elbow
    9: 21,   # left_wrist -> left_wrist
    10: 20,  # right_wrist -> right_wrist
    11: 2,   # left_hip -> left_hip
    12: 1,   # right_hip -> right_hip
    13: 5,   # left_knee -> left_knee
    14: 4,   # right_knee -> right_knee
    15: 8,   # left_ankle -> left_ankle
    16: 7,   # right_ankle -> right_ankle
}

angles = calculate_joint_angles(key_points_3d_all[0])
print("Joint Angles:", angles)
for joint_idx, angle in angles.items():
    if joint_idx in coco_to_smplx:
        smplx_joint_idx = coco_to_smplx[joint_idx]
        # Convert angle from degrees to radians
        angle_rad = np.radians(angle)
        # Apply rotation around the X-axis for simplicity
        body_pose[0, smplx_joint_idx, 0] = angle_rad




Joint Angles: {7: 168.8202903481144, 8: 166.0538219448474, 13: 162.31717837253413, 14: 168.16348723013186}


In [29]:

# Place the root joint at 3D coordinate (0, 0, 1.5)
transl = torch.tensor([[0., 0., 1.5]], device=DEVICE)

# --- 4. Forward Pass ---
output = smpl_layer(
    betas=betas,
    body_pose=body_pose,
    global_orient=global_orient,
    transl=transl,
    return_verts=True  # Tells the model to calculate the vertex positions
)

# --- 5. Extract Results ---

# 3D Coordinates of the mesh vertices (N x 6890 x 3)
vertices = output.vertices.detach().cpu().numpy().squeeze() 

# 3D Coordinates of the joint locations (N x 24 x 3)
joints = output.joints.detach().cpu().numpy().squeeze() 

# The Triangulation Faces (always the same for a given SMPL model)
faces = smpl_layer.faces

print(f"Generated {vertices.shape[0]} vertices and {joints.shape[0]} joints.")

# The generated mesh can now be visualized using tools like Open3D or trimesh.
# You can save the mesh to an OBJ file:
import trimesh
trimesh.Trimesh(vertices=vertices, faces=faces).export('posed_mesh.obj')

Generated 10475 vertices and 127 joints.


'# https://github.com/mikedh/trimesh\nv 0.06677661 -0.55409294 1.59240842\nv 0.07089236 -0.55353767 1.59112120\nv 0.07138129 -0.55344868 1.59115505\nv 0.06703579 -0.55404741 1.59230185\nv 0.07882193 -0.55004197 1.60224152\nv 0.07826320 -0.55498093 1.59984744\nv 0.07776324 -0.55703580 1.60059774\nv 0.07762359 -0.55615294 1.60353410\nv 0.02931978 -0.48810232 1.64500070\nv 0.01372701 -0.48150975 1.64865041\nv 0.01506474 -0.47075537 1.64459121\nv 0.03118179 -0.47378051 1.63873649\nv 0.05154337 -0.46450791 1.60830522\nv 0.04588283 -0.47047070 1.61880314\nv 0.04514540 -0.45836473 1.61745048\nv 0.05565505 -0.45728344 1.60139143\nv 0.04245951 -0.64769888 1.52226257\nv 0.05016971 -0.64392394 1.52759480\nv 0.04896436 -0.63913167 1.52170169\nv -0.06635106 -0.55396855 1.59338045\nv -0.07051183 -0.55336481 1.59219253\nv -0.07091605 -0.55358005 1.59247410\nv -0.06661537 -0.55633515 1.59408557\nv -0.07850591 -0.55254143 1.60321283\nv -0.07809221 -0.55731726 1.60077715\nv -0.07724880 -0.55797535 1.601