In [1]:
import torch
from torchvision.transforms import ToTensor
from utils.dataloader import NYUv2
import open3d as o3d


# RGB Parameters
fx, fy = 5.1885790117450188e+02, 5.1946961112127485e+02
cx, cy = 3.2558244941119034e+02, 2.5373616633400465e+02


In [2]:
def depth_to_point_cloud(depth, K):
    """
    depth: Tensor of shape (H, W) - values in meters
    K: camera params
    Retorna: Tensor of shape (N, 3) with 3D points
    """
    device = depth.device
    H, W = depth.shape
    y, x = torch.meshgrid(torch.arange(H, device=device), torch.arange(W, device=device), indexing='ij')

    z = depth
    x = (x - K["cx"]) * z / K["fx"]
    y = (y - K["cy"]) * z / K["fy"

    ]
    xyz = torch.stack((x, y, z), dim=-1)  # (H, W, 3)
    xyz = xyz.view(-1, 3)  # (N, 3)
    valid = (z > 0).view(-1)
    return xyz[valid]


In [11]:
dataset = NYUv2(
    root="data/nyuv2",
    train=False,
    rgb_transform=ToTensor(),
    seg_transform=ToTensor(),
    depth_transform=ToTensor()
)

rgb, seg, depth = dataset[0]  # depth in meters
print(f"RGB shape: {rgb.shape}, Segmentation shape: {seg.shape}, Depth shape: {depth.shape}")
depth = depth.squeeze(0)  # (H, W, 1) -> (H, W)

K = {"fx": fx, "fy": fy, "cx": cx, "cy": cy}

tensor([[[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         ...,
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]]])
tensor([[[255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         ...,
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255],
         [255, 255, 255,  ..., 255, 255, 255]]])
RGB shape: torch.Size([3, 480, 640]), Segmentation shape: torch.Size([1, 480, 640]), Depth shape: torch.Size([1, 480, 640])


In [4]:
point_cloud = depth_to_point_cloud(depth, K)  # (N, 3)

print("Point cloud:", point_cloud.shape)


Point cloud: torch.Size([253016, 3])


In [5]:

def visualize_point_cloud(xyz, rgb=None):
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(xyz.cpu().numpy())
    if rgb is not None:
        pcd.colors = o3d.utility.Vector3dVector(rgb.cpu().numpy())
    o3d.visualization.draw_geometries([pcd])

In [6]:
import numpy as np


np.unique(rgb)

array([0.        , 0.00392157, 0.00784314, 0.01176471, 0.01568628,
       0.01960784, 0.02352941, 0.02745098, 0.03137255, 0.03529412,
       0.03921569, 0.04313726, 0.04705882, 0.05098039, 0.05490196,
       0.05882353, 0.0627451 , 0.06666667, 0.07058824, 0.07450981,
       0.07843138, 0.08235294, 0.08627451, 0.09019608, 0.09411765,
       0.09803922, 0.10196079, 0.10588235, 0.10980392, 0.11372549,
       0.11764706, 0.12156863, 0.1254902 , 0.12941177, 0.13333334,
       0.13725491, 0.14117648, 0.14509805, 0.14901961, 0.15294118,
       0.15686275, 0.16078432, 0.16470589, 0.16862746, 0.17254902,
       0.1764706 , 0.18039216, 0.18431373, 0.1882353 , 0.19215687,
       0.19607843, 0.2       , 0.20392157, 0.20784314, 0.21176471,
       0.21568628, 0.21960784, 0.22352941, 0.22745098, 0.23137255,
       0.23529412, 0.23921569, 0.24313726, 0.24705882, 0.2509804 ,
       0.25490198, 0.25882354, 0.2627451 , 0.26666668, 0.27058825,
       0.27450982, 0.2784314 , 0.28235295, 0.28627452, 0.29019

In [7]:
valid_rgb = rgb.permute(1, 2, 0).reshape(-1, 3)[(depth > 0).view(-1)]  # (N, 3)
visualize_point_cloud(point_cloud, valid_rgb)

In [8]:
from utils.dataloader import get_segmentation_colors


colored_seg = get_segmentation_colors(seg)

In [9]:
np.unique(colored_seg)

array([0.        , 0.13333333, 0.15294118, 0.15686275, 0.17254902,
       0.47058824, 0.58823529, 0.59607843, 0.62745098, 0.69019608,
       0.71372549, 0.73333333, 0.7372549 , 0.74117647, 0.77254902,
       0.82352941, 0.83529412, 0.83921569, 0.96862745, 1.        ])

In [10]:
valid_seg = colored_seg.permute(1, 2, 0).reshape(-1, 3)[(depth > 0).view(-1)]  # (N, 3)
print("Valid segmentation shape:", valid_seg.shape)
visualize_point_cloud(point_cloud, valid_seg)

Valid segmentation shape: torch.Size([253016, 3])


In [2]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

2.6.0
12.6
True
