In [1]:
import sys, os, time
import logging
import torch
import pickle
import numpy as np
import cv2
workspace_path = os.path.abspath(os.getcwd() + '/../')
sys.path.insert(0, workspace_path)
workspace_path

'/home/rajeev-gupta/sensyn_ws/src/object_detector/scripts'

In [2]:
from pcdet.models.detectors import GraphRCNN
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.utils.calibration_kitti import Calibration
from pcdet.utils.common_utils import create_logger
from pcdet.datasets.kitti.kitti_utils import calib_to_matricies
from pcdet.models import load_data_to_gpu

In [3]:
log_path = './test_logs.txt'
# relative paths wrt tools
cfg_file  = '/tools/cfgs/kitti_models/graph_rcnn_voi.yaml'
ckpt_path = '/tools/ckpts/graph_rcnn_voi_kitti.pth'
to_cpu = False

In [4]:
logger = create_logger(log_path)

In [5]:
cfg_from_yaml_file(workspace_path+cfg_file, cfg)

{'ROOT_DIR': PosixPath('/home/rajeev-gupta/sensyn_ws/src/object_detector/scripts'),
 'LOCAL_RANK': 0,
 'CLASS_NAMES': ['Car'],
 'DATA_CONFIG': {'DATASET': 'KittiDataset',
  'DATA_PATH': '../data/kitti',
  'BACKEND': {'NAME': 'HardDiskBackend'},
  'POINT_CLOUD_RANGE': [0, -40, -3, 70.4, 40, 1],
  'DATA_SPLIT': {'train': 'train', 'test': 'test'},
  'INFO_PATH': {'train': ['kitti_infos_train.pkl'],
   'test': ['kitti_infos_test.pkl']},
  'GET_ITEM_LIST': ['points', 'image', 'calib_matricies', 'gt_boxes2d'],
  'FOV_POINTS_ONLY': True,
  'ENABLE_SIMILAR_TYPE': True,
  'DATA_AUGMENTOR': {'DISABLE_AUG_LIST': ['placeholder'],
   'AUG_CONFIG_LIST': [{'NAME': 'random_world_flip',
     'PROBABILITY': 0.5,
     'ALONG_AXIS_LIST': ['x']},
    {'NAME': 'random_world_rotation',
     'PROBABILITY': 1.0,
     'WORLD_ROT_ANGLE': [-0.78539816, 0.78539816]},
    {'NAME': 'random_world_scaling',
     'PROBABILITY': 1.0,
     'WORLD_SCALE_RANGE': [0.95, 1.05]}]},
  'POINT_FEATURE_ENCODING': {'encoding_type'

In [6]:
from pathlib import Path

import numpy as np
import torch.utils.data as torch_data

from pcdet.datasets.processor.data_processor import DataProcessor
from pcdet.datasets.processor.point_feature_encoder import PointFeatureEncoder

class CustomKittiDataset(torch_data.Dataset):
    def __init__(self, dataset_cfg=None, class_names=None, training=True, root_path=None, logger=None):
        super().__init__()
        self.dataset_cfg = dataset_cfg
        self.training = training
        self.class_names = class_names
        self.logger = logger
        self.root_path = Path(root_path) if root_path is not None else Path(self.dataset_cfg.DATA_PATH)

        self.point_cloud_range = np.array(self.dataset_cfg.POINT_CLOUD_RANGE, dtype=np.float32)
        self.point_feature_encoder = PointFeatureEncoder(
            self.dataset_cfg.POINT_FEATURE_ENCODING,
            point_cloud_range=self.point_cloud_range
        )
        self.data_processor = DataProcessor(
            self.dataset_cfg.DATA_PROCESSOR, point_cloud_range=self.point_cloud_range,
            training=self.training, num_point_features=self.point_feature_encoder.num_point_features
        )
        self.grid_size = self.data_processor.grid_size
        self.voxel_size = self.data_processor.voxel_size
        self.total_epochs = 0
        self.cur_epoch = 0
        self._merge_all_iters_to_one_epoch = False

    @property
    def mode(self):
        return 'train' if self.training else 'test'
        
    def __getitem__(self, index):
        info_path = '/media/rajeev-gupta/Drive250/data/kitti/kitti_infos_test.pkl'
        with open(info_path, 'rb') as i_file:
            i_dict = pickle.load(i_file)
        info = i_dict[index]
        
        sample_idx = info['point_cloud']['lidar_idx']
        img_shape = info['image']['image_shape']
        calib = self.get_calib(sample_idx)
        get_item_list = self.dataset_cfg.get('GET_ITEM_LIST', ['points'])

        input_dict = {
            'frame_id': sample_idx,
            'calib': calib,
        }

        if "points" in get_item_list:
            points = self.get_lidar(sample_idx)
            if self.dataset_cfg.FOV_POINTS_ONLY:
                pts_rect = calib.lidar_to_rect(points[:, 0:3])
                fov_flag = self.get_fov_flag(pts_rect, img_shape, calib)
                points = points[fov_flag]
            input_dict['points'] = points

        if "image" in get_item_list:
            input_dict['image'] = self.get_image(sample_idx)

        if "calib_matricies" in get_item_list:
            input_dict["trans_lidar_to_cam"], input_dict["trans_cam_to_img"] = calib_to_matricies(calib)

        data_dict = self.prepare_data(data_dict=input_dict)

        data_dict['image_shape'] = img_shape
        return data_dict
    
    def get_calib(self, idx):
        calib_file = self.root_path / 'testing' / 'calib' / ('%s.txt' % idx)
        return Calibration(calib_file)

    def get_lidar(self, idx):
        lidar_file = self.root_path / 'testing' / 'velodyne' / ('%s.bin' % idx)
        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
    
    def get_image(self, idx):
        img_file = self.root_path / 'testing' / 'image_2' / ('%s.png' % idx)
        return cv2.imread(str(img_file), cv2.IMREAD_COLOR)
    
    def prepare_data(self, data_dict):
        if data_dict.get('points', None) is not None:
            data_dict = self.point_feature_encoder.forward(data_dict)

        data_dict = self.data_processor.forward(
            data_dict=data_dict
        )
        return data_dict

    @staticmethod
    def get_fov_flag(pts_rect, img_shape, calib):
        """
        Args:
            pts_rect:
            img_shape:
            calib:

        Returns:

        """
        pts_img, pts_rect_depth = calib.rect_to_img(pts_rect)
        val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1])
        val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0])
        val_flag_merge = np.logical_and(val_flag_1, val_flag_2)
        pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0)

        return pts_valid_flag


In [7]:
dataset = CustomKittiDataset(
        dataset_cfg=cfg.DATA_CONFIG,
        class_names=cfg.CLASS_NAMES,
        root_path='/home/rajeev-gupta/sensyn_ws/src/GD-MAE/data/kitti',
        training=False,
        logger=logger,
    )

In [8]:
model = GraphRCNN(cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset, logger=logger)

RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [None]:
get_dict = dataset.__getitem__(4)
get_dict

def get_fov_flag ******************


{'frame_id': '000004',
 'calib': <pcdet.utils.calibration_kitti.Calibration at 0x7fabbf0d06d0>,
 'points': array([[ 3.7786e+01,  7.8970e+00,  1.5140e+00,  9.4000e-01],
        [ 3.7753e+01,  8.0140e+00,  1.5140e+00,  9.9000e-01],
        [ 3.7741e+01,  8.1360e+00,  1.5150e+00,  9.9000e-01],
        ...,
        [ 6.3800e+00, -3.2000e-02, -1.6670e+00,  2.2000e-01],
        [ 6.3980e+00, -2.2000e-02, -1.6720e+00,  2.1000e-01],
        [ 6.3770e+00, -1.0000e-03, -1.6660e+00,  1.2000e-01]],
       dtype=float32),
 'image': array([[[-1.277312  , -1.5028257 , -1.5781852 ],
         [-1.277312  , -1.5028257 , -1.5781852 ],
         [-1.2908984 , -1.4599018 , -1.5781852 ],
         ...,
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ],
         [ 0.        ,  0.        ,  0.        ]],
 
        [[-1.3044848 , -1.4885178 , -1.5781852 ],
         [-1.277312  , -1.5028257 , -1.5781852 ],
         [-1.277312  , -1.4885178 , -1.5922866 ],
         

In [None]:
for key, val in get_dict.items():
    print(key)
    if type(val) == tuple:
        get_dict[key] = list(val)
    elif key == 'points':
        # add a zero column
        n = val.shape[0]
        z_col = np.zeros((n, 1), dtype=float)
        get_dict[key] = np.concatenate((z_col, val), axis = 1)
        continue
    elif key == 'image':
        # transpose (384, 1280, 3) to (3, 384, 1280)
        val_transposed = np.transpose(val, (2, 0, 1))
        get_dict[key] = val_transposed
        # print(get_dict[key].shape)
    elif key == 'transformation_2d_list' or key == 'transformation_2d_params':
        get_dict[key] = [val]
        continue
    get_dict[key] = np.array([get_dict[key]])
            
get_dict['batch_size'] = 1



frame_id
calib
points
image
trans_lidar_to_cam
trans_cam_to_img
use_lead_xyz
transformation_2d_list
transformation_2d_params
image_rescale_shape
image_pad_shape
image_shape


In [None]:
get_dict

{'frame_id': array(['000004'], dtype='<U6'),
 'calib': array([<pcdet.utils.calibration_kitti.Calibration object at 0x7fabbf0d06d0>],
       dtype=object),
 'points': array([[ 0.00000000e+00,  3.77859993e+01,  7.89699984e+00,
          1.51400006e+00,  9.39999998e-01],
        [ 0.00000000e+00,  3.77529984e+01,  8.01399994e+00,
          1.51400006e+00,  9.90000010e-01],
        [ 0.00000000e+00,  3.77410011e+01,  8.13599968e+00,
          1.51499999e+00,  9.90000010e-01],
        ...,
        [ 0.00000000e+00,  6.38000011e+00, -3.20000015e-02,
         -1.66700006e+00,  2.19999999e-01],
        [ 0.00000000e+00,  6.39799976e+00, -2.19999999e-02,
         -1.67200005e+00,  2.09999993e-01],
        [ 0.00000000e+00,  6.37699986e+00, -1.00000005e-03,
         -1.66600001e+00,  1.19999997e-01]]),
 'image': array([[[[-1.277312  , -1.277312  , -1.2908984 , ...,  0.        ,
            0.        ,  0.        ],
          [-1.3044848 , -1.277312  , -1.277312  , ...,  0.        ,
            0

In [None]:
with torch.no_grad():
    model.load_params_from_file(filename=workspace_path+ckpt_path, logger=logger, to_cpu=to_cpu)
    model.cuda()
    model.eval()
    time.sleep(2)


2024-07-01 11:33:31,533   INFO  ==> Loading parameters from checkpoint /home/rajeev-gupta/sensyn_ws/src/object_detector/tools/ckpts/graph_rcnn_voi_kitti.pth to GPU
2024-07-01 11:33:31,741   INFO  ==> Done (loaded 518/518)


In [None]:
data_input = get_dict
# data_input = b_dict
torch.cuda.synchronize()
start_time = time.time()
load_data_to_gpu(data_input)
pred_dicts, ret_dict = model(data_input)
torch.cuda.synchronize()
end_time = time.time()
print('Inference Time: ', end_time-start_time)

pred_dicts

Inference Time:  1.7700154781341553


[{'pred_boxes': tensor([[37.7702, 15.6945, -0.3820,  4.1328,  1.7591,  1.4666,  3.1287]],
         device='cuda:0', grad_fn=<IndexBackward0>),
  'pred_scores': tensor([0.9051], device='cuda:0', grad_fn=<IndexBackward0>),
  'pred_labels': tensor([1], device='cuda:0')}]

In [None]:
info_path = '/media/rajeev-gupta/Drive250/data/kitti/kitti_infos_test.pkl'
with open(info_path, 'rb') as i_file:
    i_dict = pickle.load(i_file)
len(i_dict)

51

In [None]:
i_dict[0]


{'point_cloud': {'num_features': 4, 'lidar_idx': '000000'},
 'image': {'image_idx': '000000',
  'image_shape': array([ 375, 1242], dtype=int32)},
 'calib': {'P2': array([[7.21537720e+02, 0.00000000e+00, 6.09559326e+02, 4.48572807e+01],
         [0.00000000e+00, 7.21537720e+02, 1.72854004e+02, 2.16379106e-01],
         [0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 2.74588400e-03],
         [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]),
  'R0_rect': array([[ 0.9999239 ,  0.00983776, -0.00744505,  0.        ],
         [-0.0098698 ,  0.9999421 , -0.00427846,  0.        ],
         [ 0.00740253,  0.00435161,  0.9999631 ,  0.        ],
         [ 0.        ,  0.        ,  0.        ,  1.        ]],
        dtype=float32),
  'Tr_velo_to_cam': array([[ 7.53374491e-03, -9.99971390e-01, -6.16602018e-04,
          -4.06976603e-03],
         [ 1.48024904e-02,  7.28073297e-04, -9.99890208e-01,
          -7.63161778e-02],
         [ 9.99862075e-01,  7.52379000e-03,  1.480755