# Data Exploration

In this notebook, we load the KITTI dataset and explore the data.

In [None]:
# Import statements.
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2
from typing import Dict, Any, List, Optional, Tuple
from math import sin, cos

In [None]:
# Global constants.
KITTI_ROOT_DIR = os.path.join('..', 'data', 'KITTI')
LEFT_CAM_ROOT_DIR = os.path.join(KITTI_ROOT_DIR, 'data_object_image_2')
TRAIN_IMAGE_DIR = os.path.join(LEFT_CAM_ROOT_DIR, 'training', 'image_2')
TEST_IMAGE_DIR = os.path.join(LEFT_CAM_ROOT_DIR, 'testing', 'image_2')
LABEL_DIR = os.path.join(KITTI_ROOT_DIR, 'training', 'label_2')
DEFAULT_DATASET_ARGS = {'val_split': 0.3}
TRAIN_KEY = 'train'
VAL_KEY = 'val'
TEST_KEY = 'test'

In [None]:
def get_partition(train_image_dir: str,
                  test_image_dir: str, dataset_args: Dict[str, Any] =
                  DEFAULT_DATASET_ARGS) -> Dict[str,List[str]]:
    """Returns a dict where the keys are 'train', 'test', and 'val', and the
    values are the images under each. The list is the authoratative order of the
    train/test examples; partition['train'][0] is the first training example,
    and x_train[0] will correspond with that filename.
    :param train_image_dir: The directory in which are located all the training
    images.
    :param test_image_dir: The directory in which are located all the test
    images.
    :param dataset_args: The dataset arguments. See DEFAULT_DATASET_ARGS for
    available options.
    :return: The train/val/test partition.
    """
    dataset_args = {**DEFAULT_DATASET_ARGS, **dataset_args}
    partition = {}
    train_image_filenames = [filename for
                             filename in os.listdir(train_image_dir) if
                             filename.endswith('.png')]
    rand_indices = np.random.permutation(len(train_image_filenames))
    split_index = int(dataset_args['val_split'] * len(train_image_filenames))
    val_indices = rand_indices[:split_index]
    train_indices = rand_indices[split_index:]
    partition[TRAIN_KEY] = [train_image_filenames[i] for i in train_indices]
    partition[VAL_KEY] = [train_image_filenames[i] for i in val_indices]
    test_image_filenames = [filename for
                            filename in os.listdir(test_image_dir) if
                            filename.endswith('.png')]
    partition[TEST_KEY] = test_image_filenames
    return partition

In [None]:
def load_image_into_numpy_array(
    path: str,
    target_size: Optional[Tuple[int, int]] = None) -> np.ndarray:
    """Load an image from file into a numpy array.
    :param path: The path to the image.
    :param target_size: If specified, the width and height of the output array.
    If None, the image is size is unchanged.
    :return: The image contents as an np.ndarray of type uint8.
    """
    img_data = cv2.imread(path)
    img_data = cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB)
    if target_size:
        img_data = cv2.resize(img_data, target_size)
    return img_data.astype(np.uint8)

In [None]:
def display_image(filename: str,
                  fig_size: Tuple[float, float] = (9.0, 2.75)) -> None:
    """Displays the image at filename.
    :param filename: The path to the image.
    :param fig_size: The size of the output, in inches. The default values will
    show the KITTI images in the correct aspect ratio without passing the 80
    character vertical margin in jupyter.
    """
    img = load_image_into_numpy_array(filename)
    plt.imshow(img)
    fig = plt.gcf()
    fig.set_size_inches(*fig_size)
    plt.tight_layout()
    plt.xlim((0, img.shape[1]))
    plt.ylim((img.shape[0], 0))

In [None]:
def display_image_2d_boxes(filename: str,
                           image_labels: List[str],
                           fig_size: Tuple[float, float] = (9.0, 2.75)) -> None:
    """Displays the image at filename with its 2D bounding boxes.
    :param filename: The path to the image.
    :param image_labels: The image's labels.
    :param fig_size: The size of the output, in inches.
    """
    display_image(filename, fig_size=fig_size)
    ax = plt.gca()
    for line in image_labels:
        x_left, y_top, x_right, y_bot = get_label_2d_box(line)
        width = x_right - x_left
        height = y_bot - y_top
        rect = patches.Rectangle((x_left, y_top), width, height, linewidth=3,
                                 edgecolor='g', facecolor='none')
        ax.add_patch(rect)

In [None]:
def display_image_3d_boxes(filename: str,
                           image_labels: List[str],
                           fig_size: Tuple[float, float] = (9.0, 2.75)) -> None:
    """Displays the image at filename with its 3D bounding boxes.
    :param filename: The path to the image.
    :param image_labels: The image's labels.
    :param fig_size: The size of the output, in inches.
    """
    display_image(filename, fig_size=fig_size)
    ax = plt.gca()
    for line in image_labels:
        # TODO get 3D box and draw it.
        # See https://github.com/smallcorgi/3D-Deepbox/tree/master/visualization
        # computeBox3D.m and then projectToImage.m
        pass

In [None]:
def get_label_class(label_line: str) -> str:
    """Returns the class from the line from a label file.
    :param label_line: A line from a label file.
    :return: The class name.
    """
    return label_line.split(' ')[0]

In [None]:
def get_label_2d_box(label_line: str) -> (float, float, float, float):
    """Returns the 2D bounding box from the line from a label file.
    :param label_line: A line from a label file.
    :return: The 2D bounding box as left, top, right, bottom.
    """
    return tuple([float(num) for num in label_line.split(' ')[4:8]])

In [None]:
def get_labels(label_dir: str) -> Dict[str, List[str]]:
    """Returns a dict where the keys are the image filenames and the values are
    the labels. Each file has several labels, each of which represents an
    object.
    :param label_dir: The directory containing the ground truth label files.
    :return: The label dict.
    """
    labels = {}
    label_filenames = [filename for filename in os.listdir(label_dir) if
                       filename.endswith('.txt')]
    for filename in label_filenames:
        with open(os.path.join(label_dir, filename), 'r') as infile:
            labels[filename.replace('.txt', '.png')] = \
                [line.strip() for line in infile.readlines()]
    return labels

In [None]:
partition = get_partition(TRAIN_IMAGE_DIR, TEST_IMAGE_DIR)
labels = get_labels(LABEL_DIR)
print('{0} train images'.format(len(partition[TRAIN_KEY])))
print('{0} val images'.format(len(partition[VAL_KEY])))
print('{0} test images'.format(len(partition[TEST_KEY])))
print('{0} labels'.format(len(labels.keys())))

In [None]:
display_image(os.path.join(TRAIN_IMAGE_DIR, partition[TRAIN_KEY][0]))

In [None]:
# Use image 0000008.png for consistency (partition is in random order).
example_filename = '000008.png'
display_image(os.path.join(TRAIN_IMAGE_DIR, example_filename))
print('Labels:')
for label in labels[example_filename]:
    print(label)

In [None]:
display_image_2d_boxes(os.path.join(TRAIN_IMAGE_DIR, example_filename),
                      labels[example_filename])

In [None]:
# From 000008.png's calibration file.
P2_str = '7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03'
P2 = [float(num) for num in P2_str.split(' ')]
P2 = np.array(P2)
P2 = P2.reshape(3, 4)
print(P2)
R0_rect_str = '9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01'
R0_rect = [float(num) for num in R0_rect_str.split(' ')]
R0_rect = np.array(R0_rect)
R0_rect = R0_rect.reshape(3, 3)
# Extend R0_rect to a 4x4 with a 1 in the bottom right corner and 0s everywhere else, as per KITTI readme.txt.
R0_rect = np.concatenate((R0_rect, np.zeros((1, 3))), axis=0)
R0_rect = np.concatenate((R0_rect, np.zeros((4, 1))), axis=1)
R0_rect[3, 3] = 1
print(R0_rect)
Tr_velo_to_cam_str = '7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01'
Tr_velo_to_cam = [float(num) for num in Tr_velo_to_cam_str.split(' ')]
Tr_velo_to_cam = np.array(Tr_velo_to_cam)
Tr_velo_to_cam = Tr_velo_to_cam.reshape(3, 4)
# Extend Tr_velo_to_cam to a 4x4 with a 1 in the bottom right corner and 0s everywhere else, as per KITTI readme.txt.
Tr_velo_to_cam = np.concatenate((Tr_velo_to_cam, np.zeros((1, 4))), axis=0)
Tr_velo_to_cam[3, 3] = 1
print(Tr_velo_to_cam)
# From 000008.png's label file.
pt_3d = np.expand_dims(np.array([-2.70, 1.74, 3.68]), axis=-1)
pt_2d = np.matmul(P2, np.concatenate((pt_3d, np.ones((1, pt_3d.shape[1]))), axis=0))
pt_2d[0, :] /= pt_2d[2, :]
pt_2d[1, :] /= pt_2d[2, :]
pt_2d[2, :] *= 0
print(pt_2d)
x = np.matmul(np.matmul(np.matmul(P2, R0_rect), Tr_velo_to_cam), y)
print(x)

In [None]:
FACE_IDX = np.array([[0, 1, 5, 4], # Front
                     [1, 2, 6, 5], # Left
                     [2, 3, 7, 6], # Back
                     [3, 0, 4, 7]]) # Right
# From 000008.png's label file.
label = 'Car 0.88 3 -0.69 0.00 192.37 402.31 374.00 1.60 1.57 3.23 -2.70 1.74 3.68 -1.29'
label_arr = label.split(' ')
obj = {}
obj['type'] = label_arr[0]
obj['truncation'] = float(label_arr[1])
obj['occlusion'] = int(label_arr[2])
obj['alpha'] = float(label_arr[3])
obj['x1'] = float(label_arr[4])
obj['y1'] = float(label_arr[5])
obj['x2'] = float(label_arr[6])
obj['y2'] = float(label_arr[7])
obj['height'] = float(label_arr[8])
obj['width'] = float(label_arr[9])
obj['length'] = float(label_arr[10])
obj['tx'] = float(label_arr[11])
obj['ty'] = float(label_arr[12])
obj['tz'] = float(label_arr[13])
obj['ry'] = float(label_arr[14])

In [None]:
def get_obj_from_label_line(label_line):
    label_arr = label_line.split(' ')
    obj = {}
    obj['type'] = label_arr[0]
    obj['truncation'] = float(label_arr[1])
    obj['occlusion'] = int(label_arr[2])
    obj['alpha'] = float(label_arr[3])
    obj['x1'] = float(label_arr[4])
    obj['y1'] = float(label_arr[5])
    obj['x2'] = float(label_arr[6])
    obj['y2'] = float(label_arr[7])
    obj['height'] = float(label_arr[8])
    obj['width'] = float(label_arr[9])
    obj['length'] = float(label_arr[10])
    obj['tx'] = float(label_arr[11])
    obj['ty'] = float(label_arr[12])
    obj['tz'] = float(label_arr[13])
    obj['ry'] = float(label_arr[14])
    return obj

In [None]:
def project_to_image(pts_3D, P):
    #input: pts_3D: 3xn matrix
    #       P:      3x4 projection matrix
    #output: pts_2D: 2xn matrix
    # project in image
    pts_3D_ones = np.concatenate((pts_3D, np.ones((1, pts_3D.shape[1]))), axis=0)
    pts_2D = np.matmul(P, pts_3D_ones)
    # scale projected points
    pts_2D[0, :] = pts_2D[0, :] / pts_2D[2, :]
    pts_2D[1, :] = pts_2D[1, :] / pts_2D[2, :]
    pts_2D = pts_2D[:2, :]
    return pts_2D    

In [None]:
def compute_box_3d(obj, P):
    # Rotation matrix around yaw axis.
    R = [[+cos(obj['ry']), 0, +sin(obj['ry'])],
         [0, 1, 0],
         [-sin(obj['ry']), 0, +cos(obj['ry'])]]
    R = np.array(R)
    # 3D bounding box dimensions and corners.
    l = obj['length']
    w = obj['width']
    h = obj['height']
    x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
    y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
    z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
    corners_centered = np.array([x_corners, y_corners, z_corners])
    # Rotate and translate 3D bounding box.
    corners_3D = np.matmul(R, corners_centered)
    corners_3D[0, :] = corners_3D[0, :] + obj['tx']
    corners_3D[1, :] = corners_3D[1, :] + obj['ty']
    corners_3D[2, :] = corners_3D[2, :] + obj['tz']
    # Only draw 3D bounding box for objects in front of the camera.
    if np.any(corners_3D[2, :] < 0.1):
        return None
    # Project the 3D bounding box into the image plane.
    corners_2D = project_to_image(corners_3D, P)
    return corners_2D

In [None]:
def display_image_3d_boxes(filename: str,
                           image_labels: List[str],
                           fig_size: Tuple[float, float] = (9.0, 2.75)) -> None:
    """Displays the image at filename with its 3D bounding boxes.
    :param filename: The path to the image.
    :param image_labels: The image's labels.
    :param fig_size: The size of the output, in inches.
    """
    display_image(filename, fig_size=fig_size)
    ax = plt.gca()
    #for line in image_labels:
    #    # TODO get 3D box and draw it.
    #    # See https://github.com/smallcorgi/3D-Deepbox/tree/master/visualization
    #    # computeBox3D.m and then projectToImage.m       
    #    pass
    # TODO go through all the label lines.
    for line in image_labels:
        obj = get_obj_from_label_line(line)
        corners = compute_box_3d(obj, P2)
        if corners is not None:
            for face in range(4):
                ax.plot(corners[0, FACE_IDX[face, :]], corners[1, FACE_IDX[face, :]])

In [None]:
compute_box_3d(obj, P2)

In [None]:
display_image_3d_boxes(os.path.join(TRAIN_IMAGE_DIR, example_filename),
                      labels[example_filename])