**Read .ply file of point cloud**

In [1]:
import open3d as o3d
import numpy as np
import h5py
from pc_label_map import color_map

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [None]:
sim_pc_path = '../docs/smartLab_sim.ply'

pcd = o3d.io.read_point_cloud(sim_pc_path)
points = np.asarray(pcd.points)          # Shape: [N, 3]
colors = np.asarray(pcd.colors)           # Shape: [N, 3]

#o3d.visualization.draw_geometries([pcd])

In [None]:
# Move the point cloud to its min(x,y,z) corner
 
def move_to_corner(points):    
    min_xyz = points.min(axis=0)
    moved_points = points - min_xyz
    
    return moved_points

moved_points = move_to_corner(np.array(pcd.points))
pcd.points = o3d.utility.Vector3dVector(moved_points)

In [None]:
# Check the range of points
def array_range(array):
    min_values = np.min(array, axis=0)
    max_values = np.max(array, axis=0)
    for i, (min_val, max_val) in enumerate(zip(min_values, max_values), start=1):
        print(f"Column {i}: min = {min_val}, max = {max_val}")

print(array_range(np.array(pcd.points)))

Column 1: min = 0.0, max = 11.514938609902565
Column 2: min = 0.0, max = 11.121357249938047
Column 3: min = 0.0, max = 3.906518181158416
None


Features of dataset (9):
coordinates(3), colors(3), normalized coordinates(3)

In [None]:
def normalize_points_corner(points):    
    min_vals = np.min(points, axis=0)
    shifted_points = points - min_vals
    max_vals = np.max(shifted_points, axis=0)
    scale = max_vals.copy()
    scale[scale == 0] = 1
    normalized_points = shifted_points / scale
    return normalized_points

normalized = normalize_points_corner(np.array(pcd.points))

In [7]:
# Concatenate to get features of shape (N, 9)
features = np.concatenate([points, colors, normalized], axis=1)

**Blocks of 1m x 1m x 1m with 4096 points in each like S3DIS dataset**

In [8]:
min_bound = np.min(points, axis=0)
max_bound = np.max(points, axis=0)

block_size = 1.0  # in meters
num_blocks_x = int(np.ceil((max_bound[0] - min_bound[0]) / block_size))
num_blocks_y = int(np.ceil((max_bound[1] - min_bound[1]) / block_size))
num_blocks_z = int(np.ceil((max_bound[2] - min_bound[2]) / block_size))

block_features_list = []
block_labels_list  = []

for ix in range(num_blocks_x):
    for iy in range(num_blocks_y):
        for iz in range(num_blocks_z):
            # Define the spatial boundaries for this block
            x_min = min_bound[0] + ix * block_size
            x_max = x_min + block_size
            y_min = min_bound[1] + iy * block_size
            y_max = y_min + block_size
            z_min = min_bound[2] + iz * block_size
            z_max = z_min + block_size

            # Find indices of points within the block
            in_block = np.where(
                (points[:, 0] >= x_min) & (points[:, 0] < x_max) &
                (points[:, 1] >= y_min) & (points[:, 1] < y_max) &
                (points[:, 2] >= z_min) & (points[:, 2] < z_max)
            )[0]

            if len(in_block) == 0:
                continue  # Skip empty blocks

            block_features = features[in_block, :]
            # If label is directly extractable, substitute here. If not, infer from color.
            block_colors = colors[in_block]
            block_labels = np.argmin(np.linalg.norm(colors[in_block][:, None] - color_map, axis=2), axis=1)

            # --- Handling Block Size (4096 points) ---
            # If there are more than 4096 points, randomly sample 4096.
            # If there are fewer, perform random duplication (or padding with zeros) to reach 4096.
            num_points = block_features.shape[0]
            target_points = 4096

            if num_points >= target_points:
                idx = np.random.choice(num_points, target_points, replace=False)
            else:
                # Duplicate some points
                idx = np.concatenate([
                    np.arange(num_points),
                    np.random.choice(num_points, target_points - num_points, replace=True)
                ])
            block_features = block_features[idx, :]
            block_labels = block_labels[idx]

            block_features_list.append(block_features)
            block_labels_list.append(block_labels)

In [9]:
data_array = np.stack(block_features_list, axis=0)  # Shape: (B, 4096, 9)
label_array = np.stack(block_labels_list, axis=0)     # Shape: (B, 4096)

print(data_array[0][0])
print(label_array[0][0])

[0.22102129 0.73422805 0.51192494 0.         0.         0.
 0.01919431 0.06601964 0.13104379]
12


In [10]:
sim_pc_data_path = '../docs/sim_pc_dataset_moved.h5'

# Save to HDF5
with h5py.File(sim_pc_data_path, 'w') as f:
    f.create_dataset('data', data=data_array, compression='gzip')
    f.create_dataset('label', data=label_array, compression='gzip')

In [20]:
# Test HDF5 data
import h5py
f = h5py.File(sim_pc_data_path, 'r') #train_pc_path, data_file_path
list(f.keys())
dset = f['data']
dset.shape

(485, 4096, 9)