# KITTI Dataset augmentation visualization

## Dataset structure

```
📦gta
┗ 📂training
  ┣ 📂calib
  ┃ ┣ 📜<file_id>.txt
  ┃ ┗ 📜 ...
  ┣ 📂image_2
  ┃ ┣ 📜<file_id>.png
  ┃ ┗ 📜 ...
  ┣ 📂label_2
  ┃ ┣ 📜<file_id>.txt
  ┃ ┗ 📜 ...
  ┗ 📂velodyne
    ┣ 📜<file_id>.bin
    ┗ 📜 ...
```

In [30]:
import os
import mayavi.mlab as mlab
import numpy as np
from copy import deepcopy

import pcdet.datasets.augmentor

from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.datasets import build_dataloader
from pcdet.utils import common_utils
from tools.visual_utils import visualize_utils as V

mlab.init_notebook()

Notebook initialized with ipy backend.


In [31]:
# generate pickles from the kitti data
os.system("python -m pcdet.datasets.kitti.kitti_dataset create_kitti_infos tools/cfgs/dataset_configs/kitti_dataset.yaml")

0

## Visualizing the LiDAR point cloud with labels

In [32]:
logger = common_utils.create_logger()

In [33]:
cfg_from_yaml_file('tools/cfgs/dataset_configs/kitti_dataset.yaml', cfg)

cfg.DATA_PATH = 'data/kitti'

train_set, train_loader, train_sampler = build_dataloader(
    dataset_cfg=cfg,
    class_names=['Car', 'Pedestrian', 'Cyclist'],
    batch_size=1,
    dist=False,
    workers=4,
    logger=logger,
    training=False,
    merge_all_iters_to_one_epoch=False,
    total_epochs=0
)

logger.info(f'Total number of samples: \t{len(train_set)}')

data_dict_list = []
logger.info('Loading samples')
for idx, data_dict in enumerate(train_set):
    logger.info(f'Loaded sample index: \t{idx + 1}')
    data_dict = train_set.collate_batch([data_dict])
    data_dict_list.append(data_dict)


2021-04-05 16:17:19,814   INFO  Loading KITTI dataset
2021-04-05 16:17:19,814   INFO  Loading KITTI dataset
2021-04-05 16:17:19,816   INFO  Total samples for KITTI dataset: 6
2021-04-05 16:17:19,816   INFO  Total samples for KITTI dataset: 6
2021-04-05 16:17:19,817   INFO  Total number of samples: 	6
2021-04-05 16:17:19,817   INFO  Total number of samples: 	6
2021-04-05 16:17:19,818   INFO  Loading samples
2021-04-05 16:17:19,818   INFO  Loading samples
2021-04-05 16:17:19,833   INFO  Loaded sample index: 	1
2021-04-05 16:17:19,833   INFO  Loaded sample index: 	1
2021-04-05 16:17:19,879   INFO  Loaded sample index: 	2
2021-04-05 16:17:19,879   INFO  Loaded sample index: 	2
2021-04-05 16:17:19,899   INFO  Loaded sample index: 	3
2021-04-05 16:17:19,899   INFO  Loaded sample index: 	3
2021-04-05 16:17:19,916   INFO  Loaded sample index: 	4
2021-04-05 16:17:19,916   INFO  Loaded sample index: 	4
2021-04-05 16:17:19,933   INFO  Loaded sample index: 	5
2021-04-05 16:17:19,933   INFO  Loaded

In [34]:
def show_pc(data_dict):
    V.draw_scenes(
        points=data_dict['points'][:, 1:], gt_boxes=data_dict['gt_boxes'][0]
    )
    return mlab.test_plot3d()


In [35]:
show_pc(data_dict_list[1])

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

## Augmentation methods

### Random flip

In [36]:
def random_flip_along_x(gt_boxes, points):
    """
    Args:
        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C)
    Returns:
    """
    enable = True
    if enable:
        gt_boxes[:, 1] = -gt_boxes[:, 1]
        gt_boxes[:, 6] = -gt_boxes[:, 6]
        points[:, 1] = -points[:, 1]

        if gt_boxes.shape[1] > 8:
            gt_boxes[:, 8] = -gt_boxes[:, 8]

    return gt_boxes, points

def random_flip_along_y(gt_boxes, points):
    """
    Args:
        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C)
    Returns:
    """
    enable = True
    if enable:
        gt_boxes[:, 0] = -gt_boxes[:, 0]
        gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
        points[:, 0] = -points[:, 0]

        if gt_boxes.shape[1] > 7:
            gt_boxes[:, 7] = -gt_boxes[:, 7]

    return gt_boxes, points

In [37]:
flipped_y = deepcopy(data_dict_list[1])
flipped_y['gt_boxes'][0], flipped_y['points'][:,1:] = random_flip_along_y(flipped_y['gt_boxes'][0], flipped_y['points'][:,1:])

show_pc(flipped_y)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

In [38]:
flipped_x = deepcopy(data_dict_list[1])
flipped_x['gt_boxes'][0], flipped_x['points'][:,1:] = random_flip_along_x(flipped_x['gt_boxes'][0], flipped_x['points'][:,1:])

show_pc(flipped_x)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Global rotation

In [39]:
def global_rotation(gt_boxes, points, rot_range):
    """
    Args:
        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        rot_range: [min, max]
    Returns:
    """
    noise_rotation = np.random.uniform(rot_range[0], rot_range[1])
    points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], np.array([noise_rotation]))[0]
    gt_boxes[:, 0:3] = common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], np.array([noise_rotation]))[0]
    gt_boxes[:, 6] += noise_rotation
    if gt_boxes.shape[1] > 8:
        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
            np.array([noise_rotation])
        )[0][:, 0:2]

    return gt_boxes, points

In [40]:
rotated = deepcopy(data_dict_list[1])
rotated['gt_boxes'][0], rotated['points'][:,1:] = global_rotation(rotated['gt_boxes'][0], rotated['points'][:,1:], [-0.78539816, 0.78539816])

show_pc(rotated)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Global scaling

In [41]:
def global_scaling(gt_boxes, points, scale_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
        points: (M, 3 + C),
        scale_range: [min, max]
    Returns:
    """
    if scale_range[1] - scale_range[0] < 1e-3:
        return gt_boxes, points
    noise_scale = np.random.uniform(scale_range[0], scale_range[1])
    points[:, :3] *= noise_scale
    gt_boxes[:, :6] *= noise_scale
    return gt_boxes, points

In [42]:
scaled = deepcopy(data_dict_list[1])
scaled['gt_boxes'][0], scaled['points'][:,1:] = global_scaling(scaled['gt_boxes'][0], scaled['points'][:, 1:], [0.2, 0.5])

show_pc(scaled)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Global translation

In [43]:
def random_translation_along_x(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    points[:, 0] += offset
    gt_boxes[:, 0] += offset

    if gt_boxes.shape[1] > 7:
        gt_boxes[:, 7] += offset

    return gt_boxes, points

def random_translation_along_y(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    points[:, 1] += offset
    gt_boxes[:, 1] += offset

    if gt_boxes.shape[1] > 8:
        gt_boxes[:, 8] += offset

    return gt_boxes, points

def random_translation_along_z(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    points[:, 2] += offset
    gt_boxes[:, 2] += offset

    return gt_boxes, points

In [44]:
translation_x = deepcopy(data_dict_list[1])
translation_x['gt_boxes'][0], translation_x['points'][:,1:] = random_translation_along_x(translation_x['gt_boxes'][0], translation_x['points'][:, 1:], [15, 20])

show_pc(translation_x)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

In [45]:
translation_y = deepcopy(data_dict_list[1])
translation_y['gt_boxes'][0], translation_y['points'][:,1:] = random_translation_along_y(translation_y['gt_boxes'][0], translation_y['points'][:, 1:], [15, 20])

show_pc(translation_y)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

In [46]:
translation_z = deepcopy(data_dict_list[1])
translation_z['gt_boxes'][0], translation_z['points'][:,1:] = random_translation_along_z(translation_z['gt_boxes'][0], translation_z['points'][:, 1:], [15, 20])

show_pc(translation_z)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

#### Aux function to find which points are inside a bounding box

In [47]:
def filter_points_in_box(points, gt_box):
    filter_arr = []
    for point in points:
        x, y, z, dx, dy, dz = gt_box[0], gt_box[1], gt_box[2], gt_box[3], gt_box[4], gt_box[5]
        if point[0] <= x + dx/2 and point[0] >= x - dx/2 \
            and point[1] <= y + dy/2 and point[1] >= y - dy/2 \
                and point[2] <= z + dz/2 and point[2] >= z - dz/2:
                    filter_arr.append(True)
        else:
            filter_arr.append(False)

    # points_in_box = points[filter_arr]
    return filter_arr


In [48]:
points_to_filter = deepcopy(data_dict_list[1])
filter_arr = filter_points_in_box(points_to_filter['points'][:,1:], points_to_filter['gt_boxes'][0][1])
filtered_points = points_to_filter['points'][:,1:][filter_arr]

V.draw_scenes(
        points=filtered_points, gt_boxes=points_to_filter['gt_boxes'][0,1:]
    )

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Local translation

In [49]:
def local_translation_along_x(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    for box in gt_boxes:
        filter_arr = filter_points_in_box(points, box)
        points_in_box = points[filter_arr]
        for point in points:
            if np.isin(point, points_in_box).all():
                point[0] += offset

    gt_boxes[:, 0] += offset

    if gt_boxes.shape[1] > 7:
        gt_boxes[:, 7] += offset

    return gt_boxes, points


def local_translation_along_y(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    for box in gt_boxes:
        filter_arr = filter_points_in_box(points, box)
        points_in_box = points[filter_arr]
        for point in points:
            if np.isin(point, points_in_box).all():
                point[1] += offset

    gt_boxes[:, 1] += offset

    if gt_boxes.shape[1] > 8:
        gt_boxes[:, 8] += offset

    return gt_boxes, points


def local_translation_along_z(gt_boxes, points, offset_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        offset_range: [min max]]
    Returns:
    """
    offset = np.random.uniform(offset_range[0], offset_range[1])

    for box in gt_boxes:
        filter_arr = filter_points_in_box(points, box)
        points_in_box = points[filter_arr]
        for point in points:
            if np.isin(point, points_in_box).all():
                point[2] += offset

    gt_boxes[:, 2] += offset

    return gt_boxes, points

In [50]:
local_translation_x = deepcopy(data_dict_list[1])
local_translation_x['gt_boxes'][0], local_translation_x['points'][:,1:] = local_translation_along_x(local_translation_x['gt_boxes'][0], local_translation_x['points'][:, 1:], [10, 15])

show_pc(local_translation_x)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

In [51]:
local_translation_y = deepcopy(data_dict_list[1])
local_translation_y['gt_boxes'][0], local_translation_y['points'][:,1:] = local_translation_along_y(local_translation_y['gt_boxes'][0], local_translation_y['points'][:, 1:], [10, 15])

show_pc(local_translation_y)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

In [52]:
local_translation_z = deepcopy(data_dict_list[1])
local_translation_z['gt_boxes'][0], local_translation_z['points'][:,1:] = local_translation_along_z(local_translation_z['gt_boxes'][0], local_translation_z['points'][:, 1:], [10, 15])

show_pc(local_translation_z)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Local scaling

In [53]:
def local_scaling(gt_boxes, points, scale_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
        points: (M, 3 + C),
        scale_range: [min, max]
    Returns:
    """
    if scale_range[1] - scale_range[0] < 1e-3:
        return gt_boxes, points

    noise_scale = np.random.uniform(scale_range[0], scale_range[1])
    for box in gt_boxes:
        filter_arr = filter_points_in_box(points, box)
        points_in_box = points[filter_arr]

        for point in points:
            if np.isin(point, points_in_box).all():
                # tranlation to axis center
                point[0] -= box[0]
                point[1] -= box[1]
                point[2] -= box[2]

                # apply scaling
                point[:3] *= noise_scale

                # tranlation back to original position
                point[0] += box[0]
                point[1] += box[1]
                point[2] += box[2]

    gt_boxes[:, 3:6] *= noise_scale
    return gt_boxes, points

In [54]:
scaled_local = deepcopy(data_dict_list[1])
scaled_local['gt_boxes'][0], scaled_local['points'][:,1:] = local_scaling(scaled_local['gt_boxes'][0], scaled_local['points'][:, 1:], [1.2, 1.5])

show_pc(scaled_local)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

### Local rotation

In [55]:
def local_rotation(gt_boxes, points, rot_range):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
        rot_range: [min, max]
    Returns:
    """
    noise_rotation = np.random.uniform(rot_range[0], rot_range[1])

    for box in gt_boxes:
        filter_arr = filter_points_in_box(points, box)
        points_in_box = points[filter_arr]
        for point in points:
            if np.isin(point, points_in_box).all():
                centroid_x = box[0]
                centroid_y = box[1]
                centroid_z = box[2]

                #print("BEFORE:\n" + str(point))
                # tranlation to axis center
                point[0] -= centroid_x
                point[1] -= centroid_y
                point[2] -= centroid_z
                box[0] -= centroid_x
                box[1] -= centroid_y
                box[2] -= centroid_z

                # apply rotation
                point[:] = common_utils.rotate_points_along_z(point[np.newaxis, np.newaxis, :], np.array([noise_rotation]))[0][0]
                box[0:3] = common_utils.rotate_points_along_z(box[np.newaxis, np.newaxis, 0:3], np.array([noise_rotation]))[0][0]

                # tranlation back to original position
                point[0] += centroid_x
                point[1] += centroid_y
                point[2] += centroid_z
                box[0] += centroid_x
                box[1] += centroid_y
                box[2] += centroid_z
                #print("AFTER:\n" + str(point))

    gt_boxes[:, 6] += noise_rotation
    if gt_boxes.shape[1] > 8:
        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[np.newaxis, :, :],
            np.array([noise_rotation])
        )[0][:, 0:2]

    return gt_boxes, points

In [56]:
rotated_local = deepcopy(data_dict_list[1])
rotated_local['gt_boxes'][0], rotated_local['points'][:,1:] = local_rotation(rotated_local['gt_boxes'][0],rotated_local['points'][:, 1:], [0.78539815, 0.78539816])

show_pc(rotated_local)

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x02X\x08\x02\x00\x00\x001\x04\x0f\x8b\x…

Oversampling (TODO)

In [57]:
def oversample(gt_boxes, points, new_gt_boxes, new_points):
    """
    Args:
        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
        points: (M, 3 + C),
    Returns:
    """
    #gt_boxes.reshape(len(gt_boxes) + len(new_gt_boxes), gt_boxes.shape[1])
    #points.reshape(len(points) + len(new_points), points.shape[1])

    gt_boxes = np.append(gt_boxes, new_gt_boxes, axis=0)
    points = np.append(points, new_points, axis=0)

    print(gt_boxes.shape)

    return gt_boxes, points

In [58]:
oversampled = deepcopy(data_dict_list[1])
new_data = deepcopy(data_dict_list[2])
oversampled['gt_boxes'][0], oversampled['points'][:,1:] = oversample(oversampled['gt_boxes'][0], oversampled['points'][:, 1:], new_data['gt_boxes'][0], new_data['points'][:, 1:])

show_pc(oversampled)

(3, 8)


ValueError: could not broadcast input array from shape (3,8) into shape (2,8)