In [1]:

import torch
from main_pipeline import BodyReconstructionPipeline
import config



if config.FORCE_CPU:
    device = torch.device("cpu")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

# Initialize pipeline
pipeline = BodyReconstructionPipeline(
    device=device,
    output_dir=config.OUTPUT_DIR
)


[93m[WARN ] Dependency `gsplat` is required for rendering 3DGS. Install via: pip install git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70[0m
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


 pose results keys : dict_keys(['keypoints', 'scores', 'labels', 'bbox']) 
 labels = tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16])
Using device: cuda


In [2]:

# Step 1: Depth Estimation
depth_map, processed_image, cam_intrinsics = pipeline._estimate_depth_and_fov("mohamed.jpg")
pipeline.cam_intrinsics = cam_intrinsics

# Step 2: 2D Pose Detection
# keypoints_2d = self._detect_pose_2d(image_path, processed_image)
# keypoints_2d = pipeline.infer_pose(processed_image)

# Step 3: Segmentation (SAM3)
masks, boxes, scores = pipeline._generate_segmentation_masks(processed_image)

# Step 4: Create 3D Point Cloud
point_cloud_array, pcd = pipeline._create_point_cloud(depth_map, processed_image, sam_mask=masks[0])

# Clean up GPU memory before fitting
pipeline._cleanup_gpu()
    



[1/5] Estimating depth and camera FOV...
[97m[INFO ] using MLP layer as FFN[0m
[97m[INFO ] Processed Images Done taking 0.0370023250579834 seconds. Shape:  torch.Size([1, 3, 504, 378])[0m
[97m[INFO ] Model Forward Pass Done. Time: 0.17250394821166992 seconds[0m
[97m[INFO ] Conversion to Prediction Done. Time: 0.0006089210510253906 seconds[0m
[97m[INFO ] Export Results Done. Time: 0.0003056526184082031 seconds[0m
########### Using fov estimator: MoGe2...
✓ Depth map shape: (504, 378)
✓ Camera intrinsics estimated

[3/5] Generating segmentation masks...
✓ Generated 0 feet mask(s)
✓ Generated 1 mask(s)

[4/5] Creating 3D point cloud...
  Applying DBSCAN clustering to clean point cloud...
  ✓ Removed 20 outlier points, kept 65916 points
✓ Point cloud created: 65916 points

Cleaning up GPU memory...
✓ GPU memory freed


In [4]:

"""
NLF-based SMPL-X fitter
=======================
Wraps the TorchScript NLF model so it can be used with the same interface as
`MetricSMPLFitter` inside `main_pipeline`. The NLF model already predicts
SMPL-X vertices, so this class focuses on loading the SMPL-X topology for
visualization, projecting to images, and providing the z-buffer / chamfer
loss against an observed point cloud.
"""

import torch
import torchvision  # Required for TorchScript model deps
import numpy as np
import cv2
import smplx
from typing import Dict, Optional, Tuple
from pytorch3d.structures import Pointclouds
from pytorch3d.loss import chamfer_distance
from smplfitter.pt import BodyModel, BodyFitter
from typing import List, Tuple
import random


class NLFSMPLFitter:
    """Drop-in replacement for `MetricSMPLFitter` using the NLF TorchScript model."""

    def __init__(
        self,
        model_path: str = "checkpoints/nlf_l_multi.torchscript",
        smplx_model_path: str = "./data/smplx",
        gender: str = "neutral",
        device: str = "cuda",
        image: Optional[np.ndarray] = None,
    ) -> None:
        self.device = torch.device(device)
        self.model = torch.jit.load(model_path).to(self.device).eval()
        self.image = image

        # Load SMPL-X just to obtain faces/topology and enable exports/visuals.
        self.smplx_model = smplx.create(
            model_path=smplx_model_path,
            model_type="smplx",
            gender=gender,
            use_face_contour=False,
            num_betas=10,
            num_expression_coeffs=10,
            ext="npz",
        ).to(self.device)

        # Optional refinement fitter if we want to re-fit parametric pose/shape.
        self.body_model = BodyModel(
            "smplx",
            gender,
            num_betas=10,
            model_root=f"{smplx_model_path}/smplx",
        ).to(self.device)
        self.body_fitter = BodyFitter(self.body_model).to(self.device)
        self.body_fitter = torch.jit.script(self.body_fitter)

        self.prediction = None
        self.fitted_params: Optional[Dict[str, torch.Tensor]] = None

    def _prepare_image_tensor(self, image: np.ndarray) -> torch.Tensor:
        if isinstance(image, torch.Tensor):
            return image.to(self.device)
        tensor = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
        return tensor.to(self.device)

    def _infer_nlf(self, image: np.ndarray) -> Dict[str, torch.Tensor]:
        image_tensor = self._prepare_image_tensor(image)
        frame_batch = image_tensor.unsqueeze(0)
        with torch.inference_mode():
            pred = self.model.detect_smpl_batched(frame_batch, model_name="smplx")
        return pred

    def _project_points(self, points_3d: torch.Tensor, cam_intrinsics: torch.Tensor) -> torch.Tensor:
        fx = cam_intrinsics[0, 0]
        fy = cam_intrinsics[1, 1]
        cx = cam_intrinsics[0, 2]
        cy = cam_intrinsics[1, 2]
        x_2d = fx * points_3d[:, 0] / (points_3d[:, 2] + 1e-6) + cx
        y_2d = fy * points_3d[:, 1] / (points_3d[:, 2] + 1e-6) + cy
        return torch.stack([x_2d, y_2d], dim=1)

    def fit(
        self,
        keypoints_2d: Optional[np.ndarray] = None,
        cam_intrinsics: Optional[np.ndarray] = None,
        depth_map: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        feet_mask: Optional[np.ndarray] = None,
        point_cloud: Optional[np.ndarray] = None,
        **_: Dict,
    ) -> Dict[str, torch.Tensor]:
        """
        Run the NLF model and package outputs to mimic MetricSMPLFitter.
        Keypoint arguments are accepted for API compatibility but unused.
        """
        if self.image is None:
            raise ValueError("Image not provided to NLFSMPLFitter.")

        pred = self._infer_nlf(self.image)
        self.prediction = pred

        # Extract primary outputs
        pose = pred["pose"][0].to(self.device)
        betas = pred["betas"][0].to(self.device)
        transl = pred["trans"][0].to(self.device)
        vertices = pred["vertices3d"][0].to(self.device)
        joints = pred["joints3d"][0].to(self.device)

        # Optional parametric refinement to align with SMPL-X topology
        try:
            with torch.inference_mode():
                print(f" shapes of vertices: {vertices.shape}, joints: {joints.shape} ")
                fit_res = self.body_fitter.fit(vertices, joints, num_iter=3, beta_regularizer=1)
            pose_rotvecs = fit_res.get("pose_rotvecs", pose)
            shape_betas = fit_res.get("shape_betas", betas)
            trans_fitted = fit_res.get("trans", transl)
            print("NLF BodyFitter refinement succeeded.")
        except Exception as exc:  # pragma: no cover - defensive fallback
            print(f"NLF BodyFitter refinement failed, using raw predictions: {exc}")
            pose_rotvecs, shape_betas, trans_fitted = pose.unsqueeze(0), betas.unsqueeze(0), transl.unsqueeze(0)

        # Store params compatible with smplx forward
        self.fitted_params = {
            "betas": shape_betas,
            "global_orient": pose_rotvecs[:, :3],
            "body_pose": pose_rotvecs[:, 3:],
            "transl": trans_fitted,
            "left_hand_pose": torch.zeros((1, 6), device=self.device),
            "right_hand_pose": torch.zeros((1, 6), device=self.device),
            "expression": torch.zeros((1, 10), device=self.device),
            "vertices": vertices,
            "joints": joints,
        }

        # Compute optional z-buffer chamfer loss for logging
        if cam_intrinsics is not None and point_cloud is not None:
            cam_intrinsics_torch = torch.from_numpy(cam_intrinsics[0] if cam_intrinsics.ndim == 3 else cam_intrinsics).float().to(self.device)
            # _ = self.compute_depth_loss_with_point_cloud(vertices, cam_intrinsics_torch, point_cloud)
            # get zbuffer points
            zbuffer_points = self.get_zbuffer(vertices, cam_intrinsics_torch)
            print(f" shapes of zbuffer points: {zbuffer_points.shape}, point cloud: {point_cloud.shape} ")

        return self.fitted_params

    def get_zbuffer(
        self,
        vertices: torch.Tensor,
        cam_intrinsics: torch.Tensor,
    ) -> torch.Tensor:
        
        device = vertices.device
        # points_2d = self._project_points(vertices, cam_intrinsics)
        print(f" dimensions of cam_intrinsics: {cam_intrinsics.shape}, vertices: {vertices.shape} ")
        print(f" devices of cam_intrinsics: {cam_intrinsics.device}, vertices: {vertices.device} ")
        points_2d = self.project_vertices(vertices, cam_intrinsics.unsqueeze(0)).squeeze(0)
        print(f" dimensions of projected points_2d: {points_2d.shape} ")
        vertices = vertices.squeeze(0)
        
        u = points_2d[:, 0].long()
        v = points_2d[:, 1].long()
        z = vertices[:, 2]
        print(f" dimensions of u: {u.shape}, v: {v.shape}, z: {z.shape} ")
        
        H = int(cam_intrinsics[1, 2].item() * 2)
        W = int(cam_intrinsics[0, 2].item() * 2)

        inside = (u >= 0) & (u < W) & (v >= 0) & (v < H) & (z > 0)
        u, v, z = u[inside], v[inside], z[inside]
        if z.numel() == 0:
            return torch.zeros((), device=device)

        pixel_ids = v * W + u
        unique_pixels, inverse_indices = torch.unique(pixel_ids, return_inverse=True)
        z_min = torch.full((unique_pixels.shape[0],), float("inf"), device=device)
        z_min.scatter_reduce_(0, inverse_indices, z, reduce="amin")

        v_zbuf = unique_pixels // W
        u_zbuf = unique_pixels % W

        fx = cam_intrinsics[0, 0]
        fy = cam_intrinsics[1, 1]
        cx = cam_intrinsics[0, 2]
        cy = cam_intrinsics[1, 2]

        x_3d = (u_zbuf.float() - cx) * z_min / fx
        y_3d = (v_zbuf.float() - cy) * z_min / fy
        z_3d = z_min
        zbuffer_points = torch.stack([x_3d, y_3d, z_3d], dim=1)
        return zbuffer_points
    
    def project_vertices(self,coords3d, intrinsic_matrix):
        # ensure coords3d is (B, N, 3) and intrinsic_matrix is (B, 3, 3) and are on the same device and torch

        if not isinstance(coords3d, torch.Tensor) :
            coords3d = torch.from_numpy(coords3d).float().to('cuda')
        if not isinstance(intrinsic_matrix, torch.Tensor):
            intrinsic_matrix = torch.from_numpy(intrinsic_matrix).float().to('cuda')
        
        coords3d = coords3d.float().to('cuda')
        intrinsic_matrix = intrinsic_matrix.float().to('cuda')

        projected = coords3d / torch.maximum(
            torch.tensor(0.001), torch.tensor(coords3d[..., 2:]))
        
        return torch.einsum('bnk,bjk->bnj', projected, intrinsic_matrix[..., :2, :])
    
    def compute_depth_loss_with_point_cloud(
        self,
        vertices: torch.Tensor,
        cam_intrinsics: torch.Tensor,
        point_cloud: torch.Tensor,
        it: int = 0,
    ) -> torch.Tensor:
        
        zbuffer_points = self.get_zbuffer(vertices, cam_intrinsics)
        device = zbuffer_points.device

        if not isinstance(point_cloud, torch.Tensor):
            point_cloud = torch.from_numpy(point_cloud).float().to(device)
        else:
            point_cloud = point_cloud.to(device)

        pclouds_zbuf = Pointclouds([zbuffer_points])
        pclouds_gt = Pointclouds([point_cloud])
        loss, _ = chamfer_distance(pclouds_zbuf, pclouds_gt)

        if it % 50 == 0:
            print(
                f"Point cloud chamfer loss: {loss.item():.4f} "
                f"(zbuf points: {zbuffer_points.shape[0]}, gt points: {point_cloud.shape[0]})"
            )
        return loss
    

    def center_pcd(self, pcd: np.ndarray) -> np.ndarray:
        # this function centers the point cloud around the origin
        centroid = np.mean(pcd, axis=0)
        pcd -= centroid
        return pcd
    
    def ransac_scale(
        self,
        pairs: List[Tuple[float, float]], iters: int = 10000, tol: float = 0.05
        ) -> Tuple[float, int]:
        """RANSAC for scale s in s * d_sfm = d_metric.

        tol: relative tolerance |s*d_sfm - d_metric| <= tol * d_metric
        Returns best_scale, inlier_count.
        """
        if not pairs:
            raise ValueError("No depth pairs provided.")
        pairs_arr = np.array(pairs, dtype=float)  # (N,2)
        d_sfm = pairs_arr[:, 0]
        d_met = pairs_arr[:, 1]
        ratios = d_met / np.clip(d_sfm, 1e-9, None)
        best_s = np.median(ratios)
        best_inliers = 0
        n = len(pairs)
        for _ in range(iters):
            i = random.randint(0, n - 1)
            s_candidate = d_met[i] / max(d_sfm[i], 1e-9)
            pred = s_candidate * d_sfm
            err = np.abs(pred - d_met)
            inliers = np.sum(err <= tol * np.maximum(d_met, 1e-6))
            if inliers > best_inliers:
                best_inliers = inliers
                best_s = s_candidate

        return best_s, best_inliers


    def run_metric_optimization(
        self,
        cam_intrinsics: torch.Tensor,
        point_cloud: torch.Tensor,
        num_iters: int = 100,
    ) -> Dict[str, torch.Tensor]:
        # Placeholder for potential metric optimization steps
        # Currently returns the original vertices and joints without modification

        # get zbuffer points from vertices
        # device = vertices.device
        try:
            vertices = self.fitted_params["vertices"]
            joints = self.fitted_params["joints"]
        except Exception as exc:
            raise ValueError("Fitted parameters not available for metric optimization.") from exc

        zbuffer_points = self.get_zbuffer(vertices, cam_intrinsics)
        ## ensure zbuffer are numpy arrays
        if not isinstance(zbuffer_points, np.ndarray):
            zbuffer_points_np = zbuffer_points.detach().cpu().numpy()
        if not isinstance(point_cloud, np.ndarray):
            point_cloud = point_cloud.detach().cpu().numpy()
            
        # point_cloud_np = point_cloud.detach().cpu().numpy()
        # center both point clouds
        zbuffer_points_np = self.center_pcd(zbuffer_points_np)
        point_cloud = self.center_pcd(point_cloud)

        # pass only the z values of both point clouds
        z_depth = zbuffer_points_np[:, 2]
        pc_depth = point_cloud[:, 2]
        ## pass both point clouds to ransac scale
        scale, inliers = self.ransac_scale(
            list(zip(z_depth, pc_depth)), iters=1000, tol=0.1
        )
        print(f" RANSAC scale: {scale:.4f} with {inliers} inliers ")
        # scale the vertices and joints
        # vertices = vertices * scale
        # joints = joints * scale

        # ## fitted params update
        # self.fitted_params["vertices"] = vertices * scale
        # self.fitted_params["joints"] = joints * scale

        # save the point clouds for visualization
        # np.save("zbuffer_points.npy", zbuffer_points_np * scale)
        # np.save("point_cloud.npy", point_cloud)
        # pcd = np.concatenate([zbuffer_points_np * scale, point_cloud], axis=0)
        # np.save("combined_pcd.npy", pcd)

        # save it using open3d 
        import open3d as o3d
        zbuf_pcd_o3d = o3d.geometry.PointCloud()
        zbuf_pcd_o3d.points = o3d.utility.Vector3dVector(zbuffer_points_np * scale)
        o3d.io.write_point_cloud("zbuffer_points.ply", zbuf_pcd_o3d)
        pc_pcd_o3d = o3d.geometry.PointCloud()
        pc_pcd_o3d.points = o3d.utility.Vector3dVector(point_cloud)
        o3d.io.write_point_cloud("point_cloud.ply", pc_pcd_o3d)

        combined_pcd_o3d = o3d.geometry.PointCloud()
        combined_pcd_o3d.points = o3d.utility.Vector3dVector(
            np.concatenate([zbuffer_points_np * scale, point_cloud], axis=0)
        )
        o3d.io.write_point_cloud("combined_pcd.ply", combined_pcd_o3d)

        return {
            "vertices": vertices,
            "joints": joints,
        }

    def project_mesh_on_image(
        self,
        params: Optional[Dict[str, torch.Tensor]],
        image: np.ndarray,
        cam_intrinsics: np.ndarray,
    ) -> np.ndarray:
        if params is None:
            params = self.fitted_params
        if params is None:
            raise ValueError("No fitted parameters available for projection.")

        if "vertices" in params:
            vertices = params["vertices"].detach().cpu().numpy()
            vertices = vertices.squeeze(0)
            print(f" using vertices from params with shape: {vertices.shape} ")
        else:
            with torch.no_grad():
                output = self.smplx_model(**params, return_verts=True)
            vertices = output.vertices[0].cpu().numpy()

        faces = self.smplx_model.faces

        cam = cam_intrinsics[0] if cam_intrinsics.ndim == 3 else cam_intrinsics
        fx, fy, cx, cy = cam[0, 0], cam[1, 1], cam[0, 2], cam[1, 2]

        vertices_2d = np.zeros((len(vertices), 2))
        print(f" length of vertices: {len(vertices)} ")
        for i, v in enumerate(vertices):
            if v[2] > 0:
                vertices_2d[i, 0] = fx * v[0] / v[2] + cx
                vertices_2d[i, 1] = fy * v[1] / v[2] + cy

        overlay = image.copy()
        for face in faces[::10]:
            pts = vertices_2d[face].astype(np.int32)
            if np.all((pts[:, 0] >= 0) & (pts[:, 0] < image.shape[1]) & (pts[:, 1] >= 0) & (pts[:, 1] < image.shape[0])):
                cv2.polylines(overlay, [pts], True, (0, 255, 0), 1)
        return overlay

    def get_mesh(self) -> Tuple[np.ndarray, np.ndarray]:
        if self.fitted_params is None:
            raise ValueError("No fitted parameters. Run fit() first.")
        if "vertices" in self.fitted_params:
            vertices = self.fitted_params["vertices"].detach().cpu().numpy()
        else:
            with torch.no_grad():
                output = self.smplx_model(**self.fitted_params, return_verts=True)
            vertices = output.vertices[0].cpu().numpy()
        faces = self.smplx_model.faces
        return vertices, faces

    def export_mesh(self, output_path: str, params: Optional[Dict[str, torch.Tensor]] = None) -> None:
        import trimesh
        if params is None:
            params = self.fitted_params
        if params is None:
            raise ValueError("No parameters provided for export.")

        # Temporarily swap fitted params if caller supplied a different one
        original = self.fitted_params
        self.fitted_params = params
        verts, faces = self.get_mesh()
        self.fitted_params = original
        trimesh.Trimesh(vertices=verts, faces=faces).export(output_path)
        print(f"Mesh exported to {output_path}")


__all__ = ["NLFSMPLFitter"]


In [5]:
fitter = NLFSMPLFitter(image=processed_image)
processed_image.shape

 Loaded SMPLX model from ./data/smplx/smplx/SMPLX_NEUTRAL.npz 
 smpl data keys: ['hands_meanr', 'hands_meanl', 'lmk_bary_coords', 'vt', 'posedirs', 'part2num', 'hands_coeffsr', 'lmk_faces_idx', 'J_regressor', 'dynamic_lmk_faces_idx', 'hands_componentsr', 'shapedirs', 'dynamic_lmk_bary_coords', 'ft', 'hands_componentsl', 'joint2num', 'v_template', 'allow_pickle', 'f', 'hands_coeffsl', 'kintree_table', 'weights'] 


(504, 378, 3)

In [6]:
final_params = fitter.fit(
    keypoints_2d=None,
    cam_intrinsics=cam_intrinsics.cpu().numpy(),
    depth_map=depth_map,
    point_cloud=point_cloud_array,
    mask=masks[0].cpu().numpy(),
    feet_mask=pipeline.masks_feet.cpu().numpy(),
    conf_threshold=0.5
)

 shapes of vertices: torch.Size([1, 10475, 3]), joints: torch.Size([1, 55, 3]) 
 selector for part 0: 434 vertices
 selector for part 1: 245 vertices
 selector for part 2: 226 vertices
 selector for part 3: 153 vertices
 selector for part 4: 195 vertices
 selector for part 5: 204 vertices
 selector for part 6: 186 vertices
 selector for part 7: 254 vertices
 selector for part 8: 254 vertices
 selector for part 9: 614 vertices
 selector for part 12: 149 vertices
 selector for part 13: 111 vertices
 selector for part 14: 123 vertices
 selector for part 15: 3345 vertices
 selector for part 16: 276 vertices
 selector for part 17: 278 vertices
 selector for part 18: 228 vertices
 selector for part 19: 230 vertices
 selector for part 20: 200 vertices
 selector for part 21: 198 vertices
 selector for part 22: 320 vertices
 selector for part 23: 546 vertices
 selector for part 24: 546 vertices
 selector for part 25: 33 vertices
 selector for part 26: 34 vertices
 selector for part 27: 54 verti




 selector for part 46: 23 vertices
 selector for part 47: 39 vertices
 selector for part 48: 52 vertices
 selector for part 49: 30 vertices
 selector for part 50: 38 vertices
 selector for part 51: 54 vertices
 selector for part 52: 21 vertices
 selector for part 53: 31 vertices
 selector for part 54: 54 vertices
NLF BodyFitter refinement succeeded.
 dimensions of cam_intrinsics: torch.Size([3, 3]), vertices: torch.Size([1, 10475, 3]) 
 devices of cam_intrinsics: cuda:0, vertices: cuda:0 
 dimensions of projected points_2d: torch.Size([10475, 2]) 
 dimensions of u: torch.Size([10475]), v: torch.Size([10475]), z: torch.Size([10475]) 
 shapes of zbuffer points: torch.Size([4295, 3]), point cloud: (65916, 3) 


In [7]:
projected_image = fitter.project_mesh_on_image(final_params,processed_image, cam_intrinsics)
cv2.imwrite("projected_mohamed_mesh_on_image.png", projected_image)
final_params['vertices'].shape


# optimize_metric = fitter.run_metric_optimization(cam_intrinsics, point_cloud_array)
# print("✓ NLF SMPL fitting complete")

 using vertices from params with shape: (10475, 3) 
 length of vertices: 10475 


torch.Size([1, 10475, 3])

In [8]:
pcd = pcd.voxel_down_sample(0.01)
point_cloud_array = np.asarray(pcd.points)
point_cloud_array.shape

(12098, 3)

In [10]:
# cam_intrinsics_torch = torch.from_numpy(cam_intrinsics[0] if cam_intrinsics.ndim == 3 else cam_intrinsics).float().to(self.device)


optimize_metric = fitter.run_metric_optimization(cam_intrinsics[0], point_cloud_array)
# print("✓ NLF SMPL fitting complete")
# cam_intrinsics.shape




 dimensions of cam_intrinsics: torch.Size([3, 3]), vertices: torch.Size([1, 10475, 3]) 
 devices of cam_intrinsics: cpu, vertices: cuda:0 
 dimensions of projected points_2d: torch.Size([10475, 2]) 
 dimensions of u: torch.Size([10475]), v: torch.Size([10475]), z: torch.Size([10475]) 
 RANSAC scale: -0.0005 with 60 inliers 


In [46]:
pred = fitter._infer_nlf(fitter.image)

In [None]:
vertices = pred["vertices3d"][0].to(fitter.device)
print(f" shapes of vertices: {vertices.shape} , cam_intrinsics: {cam_intrinsics.shape} ")


zbuffer_points = fitter.get_zbuffer(vertices, cam_intrinsics.squeeze(0))

depth
    ## ensure zbuffer are numpy arrays





 shapes of vertices: torch.Size([1, 10475, 3]) , cam_intrinsics: torch.Size([1, 3, 3]) 
 dimensions of cam_intrinsics: torch.Size([3, 3]), vertices: torch.Size([1, 10475, 3]) 
 devices of cam_intrinsics: cpu, vertices: cuda:0 
 dimensions of projected points_2d: torch.Size([10475, 2]) 
 dimensions of u: torch.Size([10475]), v: torch.Size([10475]), z: torch.Size([10475]) 


In [50]:
zbuffer_points_np = zbuffer_points.detach().cpu().numpy()
point_cloud = point_cloud_array
zbuffer_points_np = fitter.center_pcd(zbuffer_points_np)
point_cloud = fitter.center_pcd(point_cloud)
# pass only the z values of both point clouds
# z_depth = zbuffer_points_np[:, 2]
# pc_depth = point_cloud[:, 2]

# ## pass both point clouds to ransac scale
# scale, inliers = fitter.ransac_scale(
#     list(zip(z_depth, pc_depth)), iters=1000, tol=0.1
# )
# print(f" RANSAC scale: {scale:.4f} with {inliers} inliers ")
# # scale the vertices and joints

In [None]:
## conclusion what needs to be done next:

## render the mesh before caluclating the zbuffer points
## then calculate the zbuffer points from the rendered depth map
## then do the ransac scaling between the zbuffer points and the point cloud


### make sure that the mesh and the mask are aligned properly in the image
### afterwards the scale would be more accurate

In [113]:
from pytorch3d.renderer import (
    RasterizationSettings,
    MeshRasterizer,
    PerspectiveCameras,
)
from pytorch3d.structures import Meshes, Pointclouds
from pytorch3d.loss import chamfer_distance

device = vertices.device

# Ensure point_cloud is on correct device
if not isinstance(point_cloud, torch.Tensor):
    point_cloud = torch.from_numpy(point_cloud).float().to(device)
else:
    point_cloud = point_cloud.to(device)

# Create SMPLX mesh
faces = torch.from_numpy(fitter.smplx_model.faces).long().to(device)
mesh = Meshes(verts=[vertices.squeeze(0)], faces=[faces])

In [114]:
# Extract H, W from intrinsics (assuming standard camera setup)
cam_intrinsics = cam_intrinsics.squeeze(0)
fx = -cam_intrinsics[0, 0] # flipped sign for PyTorch3D
fy = -cam_intrinsics[1, 1]
cx = cam_intrinsics[0, 2]
cy = cam_intrinsics[1, 2]

H = int(cy.item() * 2)
W = int(cx.item() * 2)

# Set up PyTorch3D camera
cameras = PerspectiveCameras(
    focal_length=((fx, fy),),
    principal_point=((cx, cy),),
    image_size=((H, W),),
    device=device,
    in_ndc=False
)

# Rasterization settings
raster_settings = RasterizationSettings(
    image_size=(H, W),
    blur_radius=0.0,
    faces_per_pixel=1,
    bin_size=0
)

# Rasterize mesh
rasterizer = MeshRasterizer(
    cameras=cameras,
    raster_settings=raster_settings
)

fragments = rasterizer(mesh)
rendered_depth = fragments.zbuf[0, ..., 0]  # [H, W]

In [115]:
# save the rendered depth map as an image
import matplotlib.pyplot as plt
plt.imsave("rendered_depth_map.png", rendered_depth.cpu().numpy(), cmap='gray')

In [116]:
masks[0]
# depth_map = torch.from_numpy(depth_map).float().to(device)
filtered_depth_map = depth_map * masks[0].float().to(device)


In [141]:
#print stats of filtered and rendered depth maps
## reject any non-positive values from both depth maps before calculating stats
rendered_depth = rendered_depth[rendered_depth > 0]
filtered_depth_map = filtered_depth_map[filtered_depth_map > 0]
print(f" Rendered depth map stats: min={rendered_depth.min().item():.4f}, max={rendered_depth.max().item():.4f}, mean={rendered_depth.mean().item():.4f} ")
print(f" Filtered depth map stats: min={filtered_depth_map.min().item():.4f}, max={filtered_depth_map.max().item():.4f}, mean={filtered_depth_map.mean().item():.4f} ")

 Rendered depth map stats: min=2369.3381, max=3158.9885, mean=2612.9963 
 Filtered depth map stats: min=1.2842, max=3.5501, mean=1.5162 


In [104]:
plt.imsave("filtered_depth.png", filtered_depth_map.squeeze(0).cpu().numpy(), cmap='gray')

In [142]:
def ransac_scale(
        pairs: List[Tuple[float, float]], iters: int = 10000, tol: float = 0.05
        ) -> Tuple[float, int]:
        """RANSAC for scale s in s * d_sfm = d_metric.

        tol: relative tolerance |s*d_sfm - d_metric| <= tol * d_metric
        Returns best_scale, inlier_count.
        """
        if not pairs:
            raise ValueError("No depth pairs provided.")
        pairs_arr = np.array(pairs, dtype=float)  # (N,2)
        d_sfm = pairs_arr[:, 0]
        d_met = pairs_arr[:, 1]
        ratios = d_met / np.clip(d_sfm, 1e-9, None)
        best_s = np.median(ratios)
        best_inliers = 0
        n = len(pairs)
        for _ in range(iters):
            i = random.randint(0, n - 1)
            s_candidate = d_met[i] / max(d_sfm[i], 1e-9)
            pred = s_candidate * d_sfm
            err = np.abs(pred - d_met)
            inliers = np.sum(err <= tol * np.maximum(d_met, 1e-6))
            if inliers > best_inliers:
                best_inliers = inliers
                best_s = s_candidate

        return best_s, best_inliers

In [147]:
scale, inliers = ransac_scale(
    list(zip(rendered_depth.cpu().numpy().flatten(), filtered_depth_map.cpu().numpy().flatten())), iters=1000, tol=0.01
)
print(f" RANSAC scale: {scale:.4f} with {inliers} inliers ")

 RANSAC scale: 0.0006 with 1319 inliers 


In [163]:
# save it using open3d 1723.387415908
s = 0.0006
import open3d as o3d
zbuf_pcd_o3d = o3d.geometry.PointCloud()
print(f" shapes of vertices: {vertices.squeeze(0).transpose(0,1).shape} ")
vertices_np = vertices.squeeze(0).detach().cpu().numpy() 
print(f" shapes of vertices_np: {vertices_np.shape} ")
vertices = vertices
# zbuf_pcd_o3d.points = o3d.utility.Vector3dVector(vertices.squeeze(0).transpose(0,1))
o3d.io.write_point_cloud("zbuffer_points.ply", zbuf_pcd_o3d)
pc_pcd_o3d = o3d.geometry.PointCloud()
point_cloud_array
pc_pcd_o3d.points = o3d.utility.Vector3dVector(point_cloud_array)
o3d.io.write_point_cloud("point_cloud.ply", pc_pcd_o3d)

combined_pcd_o3d = o3d.geometry.PointCloud()
combined_pcd_o3d.points = o3d.utility.Vector3dVector(
    np.concatenate([vertices_np * s, point_cloud_array], axis=0)
)
o3d.io.write_point_cloud("combined_pcd.ply", combined_pcd_o3d)


 shapes of vertices: torch.Size([3, 10475]) 
 shapes of vertices_np: (10475, 3) 


True