In [1]:
import torch
import numpy as np
import torch.nn.functional as F
from pointcept.models.point_transformer_v3.point_transformer_v3m1_base import PointTransformerV3
from pointcept.datasets.transform import (
    GridSample, Compose, ToTensor, Collect, RandomRotateTargetAngle, TRANSFORMS
)
from pointcept.datasets.utils import collate_fn
from collections import OrderedDict
import pointcept.utils.comm as comm
from pointcept.models import build_model

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_model(weight_path):
    """
    Load model with specified weights.
    
    Args:
        weight_path: path to model weights
    Returns:
        model: loaded model in evaluation mode
    """
    # Define model config directly
    model_cfg = dict(
        type='DefaultSegmentorV2',
        num_classes=2,
        backbone_out_channels=64,
        backbone=dict(
            type='PT-v3m1',
            in_channels=4,
            order=['z', 'z-trans', 'hilbert', 'hilbert-trans'],
            stride=(2, 2, 2, 2),
            enc_depths=(2, 2, 2, 6, 2),
            enc_channels=(32, 64, 128, 256, 512),
            enc_num_head=(2, 4, 8, 16, 32),
            enc_patch_size=(64, 64, 64, 64, 64),
            dec_depths=(2, 2, 2, 2),
            dec_channels=(64, 64, 128, 256),
            dec_num_head=(4, 4, 8, 16),
            dec_patch_size=(64, 64, 64, 64),
            mlp_ratio=4,
            qkv_bias=True,
            qk_scale=None,
            attn_drop=0.0,
            proj_drop=0.0,
            drop_path=0.3,
            shuffle_orders=True,
            pre_norm=True,
            enable_rpe=False,
            enable_flash=False,
            upcast_attention=False,
            upcast_softmax=False,
            cls_mode=False,
            pdnorm_bn=False,
            pdnorm_ln=False,
            pdnorm_decouple=True,
            pdnorm_adaptive=False,
            pdnorm_affine=True,
            pdnorm_conditions=('nuScenes', 'SemanticKITTI', 'Waymo'))
    )

    # Initialize model and load weights
    model = build_model(model_cfg)
    if torch.cuda.is_available():
        model = model.cuda()
        checkpoint = torch.load(weight_path)
    else:
        checkpoint = torch.load(weight_path, map_location='cpu')
        
    # Load state dict
    weight = OrderedDict()
    for key, value in checkpoint["state_dict"].items():
        if key.startswith("module."):
            key = key[7:]
        weight[key] = value
    model.load_state_dict(weight, strict=True)
    model.eval()
    
    return model

def inference(points, model):
    """
    Run inference on point cloud data.
    
    Args:
        points: numpy array of shape (n,4) - x,y,z,intensity
        model: loaded model in evaluation mode
    Returns:
        pred: predictions
        probs: confidence scores
    """
    # Create data dict 
    coord = points[:, :3]
    strength = points[:, -1].reshape([-1, 1])
    segment = np.zeros(points.shape[0]).astype(np.int32)
    data_dict = dict(coord=coord, strength=strength, segment=segment)

    # Define and apply transforms
    transform = Compose([
        dict(type='Copy', keys_dict=dict(segment='origin_segment')),
        dict(
            type='GridSample',
            grid_size=0.05,
            hash_type='fnv',
            mode='train',
            keys=('coord', 'strength', 'segment'),
            return_inverse=True)
    ])
    data_dict = transform(data_dict)
    inverse = data_dict.pop("inverse")

    # Apply voxelize transform
    voxelize = TRANSFORMS.build(dict(
        type='GridSample',
        grid_size=0.05,
        hash_type='fnv',
        mode='test',
        return_grid_coord=True,
        keys=('coord', 'strength')))
    data_part_list = voxelize(data_dict)

    # Apply post transform
    post_transform = Compose([
        dict(type='ToTensor'),
        dict(
            type='Collect',
            keys=('coord', 'grid_coord', 'index'),
            feat_keys=('coord', 'strength'))
    ])
    
    fragment_list = []
    for data_part in data_part_list:
        fragment_list.append(post_transform(data_part))

    # Do inference
    pred = torch.zeros((points.shape[0], 2))  # num_classes = 2
    if torch.cuda.is_available():
        pred = pred.cuda()

    for fragment in fragment_list:
        idx_part = fragment["index"]
        
        input_dict = {}
        for key, value in fragment.items():
            if isinstance(value, torch.Tensor):
                input_dict[key] = value.cuda() if torch.cuda.is_available() else value
            else:
                input_dict[key] = value

        with torch.no_grad():
            pred_part = model(input_dict)["seg_logits"]
            pred_part = F.softmax(pred_part, -1)
            
            bs = 0
            for be in input_dict["offset"]:
                pred[idx_part[bs:be], :] += pred_part[bs:be]
                bs = be
        
        torch.cuda.empty_cache()

    # Get final predictions and probabilities 
    probs = pred.max(1)[0].cpu().numpy()
    pred = pred.max(1)[1].cpu().numpy()

    # Map back to original points
    pred = pred[inverse]
    probs = probs[inverse]

    return pred, probs

In [6]:
import numpy as np
import torch
import torch.nn.functional as F
from collections import OrderedDict
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
from tqdm import tqdm
import time

def load_npy_data(path):
    """
    Load .npy file data.
    
    Args:
        path: path to .npy file with shape (N, 128, 1024, 7)
    Returns:
        data: array of shape (N, 128, 1024, 7)
    """
    return np.load(path)


if __name__ == "__main__":
    # Paths
    input_path = "/home/yan/slam06_mapping_local.npy"  # Replace with your input path
    weight_path = "/home/yan/pointcept151/exp/nuscenes/train_highbay_02/model/model_best.pth"
    output_dir = "predictions"
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load input data
    print("Loading input data...")
    data = load_npy_data(input_path)
    num_frames = data.shape[0]
    
    # Load model (only once)
    print("Loading model...")
    model = load_model(weight_path)
    
    # Process each frame
    print(f"Processing {num_frames} frames...")
    all_predictions = []
    all_probabilities = []
    
    for frame_idx in tqdm(range(num_frames)):
        # Get single frame data
        frame_data = data[frame_idx]  # Shape: (128, 1024, 7)
        
        # Reshape for inference
        points = frame_data.reshape(-1, frame_data.shape[-1])  # Shape: (128*1024, 7)
        # normalizing and adjusting the near_ir values
        points[:,6] = (points[:,6]*10)/65532
        # Run inference
        # start_time = time.time()
        pred, probs = inference(points, model)
        # end_time = time.time()
        # print(f"Frame {frame_idx} - Inference time: {end_time - start_time:.4f} seconds")
        
        # Reshape predictions and probabilities back to (128, 1024)
        # pred = pred.reshape(128, 1024)
        # probs = probs.reshape(128, 1024)
        
        # Save individual frame results
        # np.save(os.path.join(output_dir, f'pred_frame_{frame_idx:04d}.npy'), pred)
        # np.save(os.path.join(output_dir, f'prob_frame_{frame_idx:04d}.npy'), probs)
        
        # Collect results
        all_predictions.append(pred)
        all_probabilities.append(probs)
    
    # Convert lists to numpy arrays
    all_predictions = np.array(all_predictions)  # Shape: (N, 128, 1024)
    all_probabilities = np.array(all_probabilities)  # Shape: (N, 128, 1024)
    
    # Save combined results
    np.save(os.path.join(output_dir, 'slam06_predictions.npy'), all_predictions)
    # np.save(os.path.join(output_dir, 'all_probabilities.npy'), all_probabilities)
    
    # Print summary
    print("\nProcessing complete!")
    print(f"Results saved in: {output_dir}")
    print(f"Input shape: {data.shape}")
    print(f"Output predictions shape: {all_predictions.shape}")
    # print(f"Output probabilities shape: {all_probabilities.shape}")
    
    # Print statistics
    # print("\nPrediction statistics:")
    # print(f"Unique classes: {np.unique(all_predictions)}")
    # print(f"Mean confidence: {np.mean(all_probabilities):.4f}")

Loading input data...
Loading model...
Processing 568 frames...


100%|██████████| 568/568 [02:04<00:00,  4.57it/s]



Processing complete!
Results saved in: predictions
Input shape: (568, 128, 1024, 7)
Output predictions shape: (568, 131072)


In [7]:
all_predictions.shape

(568, 131072)

In [None]:
# data = np.load('/home/yan/pointcept151/predictions/all_probabilities.npy')
data = np.load('/home/yan/pointcept151/predictions/slam06_predictions.npy')
data.shape  

In [46]:
np.unique(all_predictions,return_counts=True)

(array([0, 1]), array([27645901, 55191603]))

In [None]:
points = np.random.rand(1000, 4)
# Load pointcloud
# points = np.fromfile(points_in, dtype=np.float32).reshape(-1, 4)
path='/home/yan/pointcept151/dataset/sequences/00/velodyne/000200.bin'
#read the point cloud data
def read_bin(path):
    pc = np.fromfile(path, dtype=np.float32).reshape(-1, 4)
    return pc
test_points = read_bin(path)
weight_path="/home/yan/pointcept151/exp/nuscenes/train_highbay_02/model/model_best.pth"
# Run inference
pred, probs = inference(test_points, weight_path)
np.save('pred.npy',pred)