First set up the correct paths pointing to the directories containing the images, bounding boxes and instances.

In [2]:
# Settings
kitti_img_dir = "/mnt/e/DataSet/kitti/training/image_2"
kitti_box_dir = "/mnt/e/DataSet/kitti/training/label_2"
kitti_inst_dir = "/mnt/e/DataSet/kitti/training/instance_2"

In [3]:
# Imports
import colorsys
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import skimage.io as io

%load_ext autoreload
%autoreload 2

In [4]:
# Function definitions

HSV_mapping = {
#  ID:  [ H_min, H_max,    S,   v ]
    0:  [  0.00,  1.00, 0.00, 0.0 ], # Background
    1:  [  0.00,  1.00, 1.00, 0.9 ], # Vehicle linked to box
    2:  [  0.00,  1.00, 1.00, 0.9 ], # Pedestrian linked to box
    3:  [  0.00,  1.00, 0.40, 0.6 ], # Vehicle or Pedestrian without box 
}


def load_boxes(path):
    """Load KITTI3D bounding boxes.
    
    Args:
        path (str): Absolute path to object annotation txt file.
        
    Returns:
        boxes (DataFrame): Pandas DataFrame containing object annotations.
    """
    return pd.read_csv(
        path,
        sep=" ",
        names=('type',    'truncation',  'occlusion',  'alpha',   'left',  'top',   'right',  'bottom',
               'height',  'width',       'length',     'x',       'y',     'z',     'ry',     'tid' )
    )


def draw_boxes(img, boxes):
    """Draw 2d object boxes on top of image for visualisation.
    
    Args:
        img (np.array): Image to draw on.
        boxes (DataFrame): Pandas DataFrame containing object annotations.
        
    Returns:
        img (np.array): Image with drawn objects.
    """
    class_map = {
        'Car': 1,
        'Truck': 1,
        'Van': 1,
        'Pedestrian': 2,
        'Person_sitting': 2,
        'Cyclist': 0,
        'DontCare': 0,
        'Misc': 0,
        'Tram': 0
    }
    for i in range(len(boxes)):
        obj_class = class_map[boxes['type'][i]]
        if obj_class==0: continue
        hue_min, hue_max, saturation, value = HSV_mapping[obj_class]
        color = get_rgb_from_id(int(i), hue_min, hue_max, saturation=saturation, value=value)
        cv2.rectangle(
            img=img, 
            pt1=(int(boxes['left'][i]), int(boxes['top'][i])),
            pt2=(int(boxes['right'][i]), int(boxes['bottom'][i])), 
            color=color*255,
            thickness=2
        )
    return img


def map_instance_to_color(instances, HSV_mapping=HSV_mapping):
    """Map instances to the corresponding colors.

    Args:
        instances (np.array): Instance array. [ H x W ]
        HSV_mapping (dict, optional): Dictionary containing for every instance class a list with [hue_min, hue_max, saturation, value].

    Returns:
        np.array: Array containing the color coded instances [ H x W x 3]
    """
    assert isinstance(instances, np.ndarray), f"'instances' must be of type np.array, not {type(instances)}"
    # Get unique instances
    uniques, instance_map = np.unique(instances, return_inverse=True)
    # Make color_map
    color_map = []
    for instance_id in uniques:
        # Determine color range
        segm_class = instance_id//1000
        box_id = instance_id%1000
        hue_min, hue_max, saturation, value = HSV_mapping.get(segm_class, [0, 1, 1, 1])
        # Get color for each instance
        color_map.append(get_rgb_from_id(int(box_id), hue_min, hue_max, saturation=saturation, value=value))
    color_map = np.array(color_map)
    # Create colored instance image
    instance_map = color_map[instance_map.reshape(instances.shape)]
    return instance_map


def get_rgb_from_id(instance_id, hue_min=0, hue_max=1, saturation=0.9, value=0.9):
    """Map an instance/bbox id to a unique color.

    Args:
        instance_id (int): Value representing a unique id.
        hue_min (float, optional): Minimum value for the hue range in which this id should be mapped. Should be greater than or equal to 0. Default: 0
        hue_max (float, optional): Maximum value for the hue range in which this id should be mapped. If larger than 1, hue value will loop back to 0. Default: 1
        saturation (float, optional): Value between 0 and 1 for saturation. Default: 0.9
        value (float, optional): Value between 0 and 1 for brightness. Default: 0.9

    Returns:
        np.array[(3,)]: Array containing the RGB values for this instance id, with values between 0 and 1.
    """
    assert isinstance(instance_id, (int, np.int32)), f"instance_id should be of type 'int', not '{type(instance_id)}'"
    assert isinstance(hue_min, (int, float)) and isinstance(hue_max, (int, float)), f"hue_min and hue_max should be of type 'float', not '{type(hue_min)}' and '{type(hue_max)}'"
    assert isinstance(saturation, (int, float)) and saturation >= 0 and saturation <= 1, f"saturation should be a float between 0 and 1, not '{saturation}'"
    assert isinstance(value, (int, float)) and value >= 0 and value <= 1, f"value should be a float between 0 and 1, not '{value}'"
    
    # Golden angle: equally distributed colors but as far appart as possible; Hue will be between 0 and 1
    golden_angle = 137/360
    h = (instance_id*golden_angle) % 1
    # Scale hue in range (hue_min, hue_max)
    h = h*(hue_max-hue_min) + hue_min
    # Get rgb values
    rgb = np.array(colorsys.hsv_to_rgb(h, saturation, value))
    return rgb


def plot_sample(i, kitti_img_dir, kitti_box_dir, kitti_inst_dir, alpha=0.6):
    """Visualise the instances and corresponding bounding boxes for a KITTI3D image.
    
    Args:
        i (int): Number of the image that needs to be shown.
        kitti_img_dir (str): Absolute path to the directory which contains the KITTI3D images.
        kitti_box_dir (str): Absolute path to the directory which contains the KITTI3D object annotations.
        kitti_inst_dir (str): Absolute path to the directory which contains our instance annotations.
    """
    # Get paths
    sample_name = f"{i:06d}"
    img_path = f"{kitti_img_dir}/{sample_name}.png"
    box_path = f"{kitti_box_dir}/{sample_name}.txt"
    inst_path = f"{kitti_inst_dir}/{sample_name}.png"

    # Load image, objects and instances
    img_inst = io.imread(img_path)
    img_box = img_inst.copy()
    boxes = load_boxes(box_path)
    inst = io.imread(inst_path)
    print(inst.shape)

    # Visualalise instances
    inst_color = map_instance_to_color(inst)
    print(inst_color.shape)
    mask = inst_color.sum(axis=2)
    img_inst[:] = img_inst[:]*0.5
    img_inst[mask!=0,:] = img_inst[mask!=0,:]*(1-alpha) + inst_color[mask!=0,:]*256*alpha
    # Visulaise boxes
    img_box = draw_boxes(img_box, boxes)
    
    # Create Figure
    plt.figure(figsize=(10,6))
    # Plot Instances
    ax1 = plt.subplot(2, 1, 1, frameon=False)
    ax1.imshow(img_inst)
    ax1.axis('off')
    plt.title(sample_name)
    # Plot Boxes
    ax2 = plt.subplot(2, 1, 2, frameon=False)
    ax2.imshow(img_box)
    ax2.axis('off')
    # Render
    plt.tight_layout()
    plt.show()

Instances which are matched to a bounding box annotation are shown in bright colors.
Instances without matches are shown in less saturated colors

这里发现一个问题：有些 instance 的 box_id 序列缺少项，我们会移除这些样本

In [99]:
import os
from tools.dataset_util import Dataset
from pathlib import Path

dataset = Dataset("train", r"/mnt/e/DataSet/kitti")
root_dir = Path(r"/mnt/e/DataSet/kitti/kitti_inst_database")
image_dir = root_dir / "image"
depth_dir = root_dir / "depth"

db = dict()

for idx in range(7481):
    name = str(idx).zfill(6)
    calib = dataset.get_calib(idx)
    plane = dataset.get_plane(idx)
    image, depth = dataset.get_image_with_depth(idx, use_penet=True)
    ground, non_ground = dataset.get_lidar_with_ground(idx, fov=True)
    bbox3d, bbox2d, labels = dataset.get_bbox(idx, chosen_cls=["Car", "Truck", "Van"])
    instance = dataset.get_instance(idx)
    uniques, instance_map = np.unique(instance, return_inverse=True)
    
    uniques = [uid for uid in uniques if str(uid).startswith("10")]
    if not all([i == uid % 1000 for i, uid in enumerate(uniques)]):
        print(name)
        continue
    
    for uid in uniques:
        seg = uid // 1000    # 1 for Car, Van, Truck
        if seg != 1:
            continue
        box_id = uid % 1000
        try:
            label = labels[box_id]
        except IndexError as e:
            continue
        if label.cls_type != "Car" or label.occlusion != 0 or label.trucation >= 1e-2:
            continue
        if label.pos[-1] > 30:
            continue
        
        mask = instance == uid
        masked_image = image * mask[:,:,np.newaxis]
        masked_depth = depth * mask
        
        mask = mask.astype(np.uint8) * 255
        
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        x, y, w_, h_ = cv2.boundingRect(contours[0])
        
        if w_ * h_ < 500:
            continue
        
        masked_image = masked_image[y:y+h_, x:x+w_]
        masked_depth = (masked_depth[y:y+h_, x:x+w_] * 256.0).astype(np.uint16)
        cv2.imwrite(str(image_dir / f"{name}_{box_id}.png"), masked_image)
        cv2.imwrite(str(depth_dir/ f"{name}_{box_id}.png"), masked_depth )
        
        db[f"{name}_{box_id}"] = {
            "label": label,
            "bbox2d": [x, y, x+w_, y+h_],
            "name": f"{name}_{box_id}",
            "calib": calib,
            "plane": plane
        }
    

000002
000010
000011
000021
000025
000029
000045
000047
000049
000051
000061
000063
000068
000075
000087
000091
000098
000102
000105
000113
000114
000119
000127
000129
000132
000134
000142
000144
000145
000146
000151
000152
000153
000154
000157
000161
000169
000174
000177
000183
000184
000186
000190
000192
000201
000203
000204
000205
000206
000207
000208
000210
000211
000214
000217
000228
000232
000245
000246
000248
000249
000254
000264
000266
000268
000273
000274
000277
000282
000295
000303
000305
000307
000310
000311
000314
000318
000330
000331
000332
000333
000335
000336
000339
000340
000345
000347
000351
000354
000357
000362
000369
000371
000377
000380
000382
000383
000386
000391
000393
000395
000401
000403
000408
000409
000412
000422
000423
000424
000427
000430
000432
000435
000436
000438
000442
000445
000446
000450
000453
000460
000461
000463
000464
000468
000469
000471
000478
000479
000480
000486
000488
000490
000492
000493
000495
000501
000505
000514
000518
000519
000522
000527

In [133]:
def get_contour(instance, uid):
    mask = instance == uid
    mask = mask.astype(np.uint8) * 255
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    x, y, w_, h_ = cv2.boundingRect(contours[0])
    return [x, y, x+w_, y+h_]

In [138]:
from tools.box_util import boxes_iou2d, find_best_match

db = dict()
for idx in range(7481):
    name = str(idx).zfill(6)
    calib = dataset.get_calib(idx)
    plane = dataset.get_plane(idx)
    image, depth = dataset.get_image_with_depth(idx, use_penet=True)
    ground, non_ground = dataset.get_lidar_with_ground(idx, fov=True)
    bbox3d, bbox2d, labels = dataset.get_bbox(idx, chosen_cls=["Car", "Truck", "Van"])
    instance = dataset.get_instance(idx)
    
    uniques, instance_map = np.unique(instance, return_inverse=True)
    uniques = [uid for uid in uniques if str(uid).startswith("10")]
    contours = np.array([get_contour(instance, uid) for uid in uniques])
    masks = [instance == uid for uid in uniques]
    if contours.shape[0] == 0 or bbox2d.shape[0] == 0:
        continue
    iou = boxes_iou2d(contours, bbox2d)
    matches = find_best_match(iou, 0.6)
    
    for i in range(len(uniques)):
        if matches[i] == -1:
            continue
        
        label = labels[matches[i]]
        if label.cls_type != "Car" or label.occlusion != 0 or label.trucation >= 1e-2:
            continue
        if label.pos[-1] > 30:
            continue
            
        mask, contour = masks[i], contours[i]
        x, y, x_, y_ = contour
        if (x_ - x) * (y_ - y) < 500:
            continue
            
        masked_image = image * mask[:,:,np.newaxis]
        masked_depth = depth * mask
        masked_image = masked_image[y:y_, x:x_]
        masked_depth = (masked_depth[y:y_, x:x_] * 256.0).astype(np.uint16)
        cv2.imwrite(str(image_dir / f"{name}_{matches[i]}.png"), masked_image)
        cv2.imwrite(str(depth_dir/ f"{name}_{matches[i]}.png"), masked_depth )
        
        db[f"{name}_{matches[i]}"] = {
            "label": label,
            "bbox2d": [x, y, x_, y_],
            "name": f"{name}_{matches[i]}",
            "calib": calib,
            "plane": plane
        }

In [144]:
all_image = os.listdir(image_dir)
all_image = set([name.split('.')[0] for name in all_image])
all_key = set(db.keys())
print(all_key - all_image)
for name in all_key - all_image:
    del db[name]
    os.remove(depth_dir / (name + ".png"))

#### 添加其他数据

In [2]:
import pickle
from pathlib import Path
from tools.dataset_util import Dataset
dataset = Dataset("train", r"/mnt/e/DataSet/kitti")
root_dir = Path(r"/mnt/e/DataSet/kitti/kitti_inst_database")
image_dir = root_dir / "image"
depth_dir = root_dir / "depth"

with open(root_dir / "kitti_car_database.pkl", "rb") as f:
    db = pickle.load(f)

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
db

{'000003_0': {'label': Car 0.000 0.000 pos: [ 1.    1.75 13.22],
  'bbox2d': [606, 182, 722, 282],
  'name': '000003_0',
  'calib': <lib.datasets.kitti_utils.Calibration at 0x7f25912e5af0>,
  'plane': array([-0.04009626, -0.9986394 ,  0.03334112,  1.47307001]),
  'image_shape': (375, 1242)},
 '000006_2': {'label': Car 0.000 0.000 pos: [-12.54   1.64  19.72],
  'bbox2d': [47, 186, 224, 241],
  'name': '000006_2',
  'calib': <lib.datasets.kitti_utils.Calibration at 0x7f25912e5f70>,
  'plane': array([-0.01691065, -0.99974672, -0.01485037,  1.66484703]),
  'image_shape': (374, 1238)},
 '000007_0': {'label': Car 0.000 0.000 pos: [-0.69  1.69 25.01],
  'bbox2d': [564, 176, 618, 223],
  'name': '000007_0',
  'calib': <lib.datasets.kitti_utils.Calibration at 0x7f25912fb3d0>,
  'plane': array([-1.94987805e-02, -9.99809725e-01, -5.57546514e-04,  1.72167804e+00]),
  'image_shape': (375, 1242)},
 '000008_5': {'label': Car 0.000 0.000 pos: [ 8.48  1.75 19.96],
  'bbox2d': [887, 179, 954, 239],
  'n

In [9]:
for sample in db.values():
    name = sample['name']
    idx = int(name.split('_')[0])
    image = dataset.get_image(idx)
    sample['image_shape'] = image.shape[:2]

In [1]:
with open(root_dir / "kitti_inst_database.pkl", "wb") as f:
    pickle.dump(db, f)

NameError: name 'root_dir' is not defined