In [8]:
from mmdet3d.datasets.nuscenes_dataset import NuScenesDataset as MMDetNuScenesDataset
from torch.utils.data import Dataset
from src.util.misc import EasyDict as edict
import cv2
import os

In [9]:
LABEL_ID2NAME = {
    0: 'unlabeled',
    1: 'barrier',
    2: 'bicycle',
    3: 'bus',
    4: 'car',
    5: 'construction-vehicle',
    6: 'motorcycle',
    7: 'pedestrian',
    8: 'traffic-cone',
    9: 'trailer',
    10: 'truck',
    11: 'driveable-surface',
    12: 'other-ground',
    13: 'sidewalk',
    14: 'terrain',
    15: 'manmade',
    16: 'vegetation'
}    
LABEL_NAME2ID = {v: k for k, v in LABEL_ID2NAME.items()}

In [10]:
CAM_NAMESPACE = 'CAM'
CAMERAS = ["FRONT", "FRONT_RIGHT", "FRONT_LEFT", "BACK", "BACK_LEFT", "BACK_RIGHT"]
CAMERA_NAMES = [f"{CAM_NAMESPACE}_{camera}" for camera in CAMERAS]
CAM_NAME2CAM_ID = {cam_name: i for i, cam_name in enumerate(CAMERA_NAMES)}
CAM_ID2CAM_NAME = {i: cam_name for i, cam_name in enumerate(CAMERA_NAMES)}

In [34]:
class NuScenesCameraInstances(Dataset):
    def __init__(self, cam_instances, img_path):
        
        self.cam_instances = cam_instances
        self.img_path = img_path
        self.patches = self._generate_patches()
    def __len__(self):
        return len(self.cam_instances)
    
    def _generate_patches(self):
        # load image from path
        patches = []
        img = cv2.imread(self.img_path)
        if img is None:
            # if instances present but file not found, raise error. else return empty list
            if self.__len__() > 0:
                raise FileNotFoundError(f"Image not found at {self.img_path} but {self.__len__()} instances are present")
            else:
                return []
 
        # return croped list of images as defined by 2d bbox for each instance
        for cam_instance in self.cam_instances:
            bbox = cam_instance.bbox # 2D bounding box annotation (exterior rectangle of the projected 3D box), a list arrange as [x1, y1, x2, y2].
            x1, y1, x2, y2 = bbox
            try:
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                patch = img[y1:y2, x1:x2]
            except Exception as e:
                print(f"Error in cropping image: {e}")
                # return full image if error occurs
                patch = img
            
            patches.append(patch)
        return patches
  
    def __getitem__(self, idx):
        cam_instance = edict(self.cam_instances[idx])   
        cam_instance.patch = self.patches[idx]
        return cam_instance

In [35]:
class NuScenesBase(MMDetNuScenesDataset):
    def __init__(self, data_root, label_names, is_sweep=False, **kwargs):
        self.data_root = data_root
        self.img_root = os.path.join(data_root, "samples" if not is_sweep else "sweeps")
        super().__init__(data_root=data_root, **kwargs)
        self.label_names = label_names
        self.label_ids = [LABEL_NAME2ID[label_name] for label_name in label_names]
        print(f"Using label names: {self.label_names}, label ids: {self.label_ids}")
    
    def __len__(self):
        self.num_samples = super().__len__()
        self.num_cameras = len(CAMERA_NAMES)
        return self.num_samples * self.num_cameras
    
    def __getitem__(self, idx):
        ret = edict()
        sample_idx = idx // self.num_cameras
        cam_idx = idx % self.num_cameras
        sample_info = edict(super().__getitem__(sample_idx))
        cam_name = CAM_ID2CAM_NAME[cam_idx]
        ret.sample_idx = sample_idx
        ret.cam_idx = cam_idx
        ret.cam_name = cam_name
        sample_img_info = edict(sample_info.images[cam_name])
        ret.update(sample_img_info)
        cam_instances = sample_info.cam_instances[cam_name] # list of dicts for each instance in the current camera image
        # filter out instances that are not in the label_names
        cam_instances = [cam_instance for cam_instance in cam_instances if cam_instance['bbox_label'] in self.label_ids]
        img_file = sample_img_info.img_path.split("/")[-1]
        ret.cam_instances = NuScenesCameraInstances(cam_instances=cam_instances, img_path=os.path.join(self.img_root, cam_name, img_file))
        return ret

In [36]:
# input dict
nusc_base_kwargs = {
    "label_names": ["car"],
    "data_root": "data/nuscenes",
    "ann_file": "nuscenes_infos_val.pkl",
    "pipeline": [],
    "box_type_3d": "Camera",
    "load_type": 'frame_based',
    "modality": dict(use_camera=True,use_lidar=False),
    "filter_empty_gt": False,
    "test_mode": False,
    "with_velocity": False,
    "use_valid_flag": False,
}


nusc_base = NuScenesBase(**nusc_base_kwargs)

04/12 01:08:25 - mmengine - [4m[97mINFO[0m - ------------------------------
04/12 01:08:25 - mmengine - [4m[97mINFO[0m - The length of training dataset: 36114
04/12 01:08:25 - mmengine - [4m[97mINFO[0m - The number of instances per category in the dataset:
+----------------------+--------+
| category             | number |
+----------------------+--------+
| car                  | 80004  |
| truck                | 15704  |
| trailer              | 4159   |
| bus                  | 3158   |
| construction_vehicle | 2678   |
| bicycle              | 2381   |
| motorcycle           | 2508   |
| pedestrian           | 34347  |
| traffic_cone         | 15597  |
| barrier              | 26992  |
+----------------------+--------+
Using label names: ['car'], label ids: [4]


In [None]:
import tqdm
pbar = tqdm.tqdm(nusc_base)
for i, sample in enumerate(pbar):
    for cam_instance_idx, cam_instance in enumerate(sample.cam_instances):
        pbar.set_description(f"Sample {i}, Camera {cam_instance_idx}, Label: {cam_instance.bbox_label}, Patch shape: {cam_instance.patch.shape}")
print("Done")

In [1]:
from src.data.datasets.nuscenes import NuScenesValidation

nusc_base_kwargs = {
    "label_names": ["car"],
    "data_root": "data/nuscenes",
    "pipeline": [],
    "box_type_3d": "Camera",
    "load_type": 'frame_based',
    "modality": dict(use_camera=True,use_lidar=False),
    "filter_empty_gt": False,
    "test_mode": False,
    "with_velocity": False,
    "use_valid_flag": False,
}

nusc_val = NuScenesValidation(**nusc_base_kwargs)

04/12 01:26:03 - mmengine - [4m[97mINFO[0m - ------------------------------
04/12 01:26:03 - mmengine - [4m[97mINFO[0m - The length of training dataset: 36114
04/12 01:26:03 - mmengine - [4m[97mINFO[0m - The number of instances per category in the dataset:
+----------------------+--------+
| category             | number |
+----------------------+--------+
| car                  | 80004  |
| truck                | 15704  |
| trailer              | 4159   |
| bus                  | 3158   |
| construction_vehicle | 2678   |
| bicycle              | 2381   |
| motorcycle           | 2508   |
| pedestrian           | 34347  |
| traffic_cone         | 15597  |
| barrier              | 26992  |
+----------------------+--------+
Using label names: ['car'], label ids: [4]


In [None]:
import tqdm
pbar = tqdm.tqdm(nusc_val)
for i, sample in enumerate(pbar):
    for cam_instance_idx, cam_instance in enumerate(sample.cam_instances):
        pbar.set_description(f"Sample {i}, Camera {cam_instance_idx}, Label: {cam_instance.bbox_label}, Patch shape: {cam_instance.patch.shape}")
print("Done")