<a href="https://colab.research.google.com/github/gkadusumilli/Voxelnet/blob/master/VoxelNet_data_creation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np

In [19]:
def test_encode_decode():
    cls_labels = np.random.choice(5, (1000, 1))
    
    points_xyz = np.random.random((1000, 3))*10
    
    boxes_3d = np.random.random((1000, 7))*10
    boxes_3d[:, 3:6] = np.absolute(boxes_3d[:, 3:6])
    print(boxes_3d)
    encoded_boxes = voxelnet_box_encoding(cls_labels, points_xyz, boxes_3d)
    print(encoded_boxes)
    print(encoded_boxes.shape)
    decoded_boxes = voxelnet_box_decoding(cls_labels, points_xyz, encoded_boxes)
    print(decoded_boxes)
    assert np.isclose(decoded_boxes, boxes_3d).all()

In [3]:
def voxelnet_box_encoding(cls_labels, points_xyz, boxes_3d):
    # offset
    boxes_3d[:, 0] = boxes_3d[:, 0] - points_xyz[:, 0]
    boxes_3d[:, 1] = boxes_3d[:, 1] - points_xyz[:, 1]
    boxes_3d[:, 2] = boxes_3d[:, 2] - points_xyz[:, 2]
    # Car
    mask = cls_labels[:, 0] == 2
    boxes_3d[mask, 0] = boxes_3d[mask, 0]/3.9
    boxes_3d[mask, 1] = boxes_3d[mask, 1]/1.56
    boxes_3d[mask, 2] = boxes_3d[mask, 2]/1.6
    boxes_3d[mask, 3] = np.log(boxes_3d[mask, 3]/3.9)
    boxes_3d[mask, 4] = np.log(boxes_3d[mask, 4]/1.56)
    boxes_3d[mask, 5] = np.log(boxes_3d[mask, 5]/1.6)
    # Pedestrian and Cyclist
    mask = (cls_labels[:, 0] == 1) + (cls_labels[:, 0] == 3)
    boxes_3d[mask, 0] = boxes_3d[mask, 0]/0.8
    boxes_3d[mask, 1] = boxes_3d[mask, 1]/1.73
    boxes_3d[mask, 2] = boxes_3d[mask, 2]/0.6
    boxes_3d[mask, 3] = np.log(boxes_3d[mask, 3]/0.8)
    boxes_3d[mask, 4] = np.log(boxes_3d[mask, 4]/1.73)
    boxes_3d[mask, 5] = np.log(boxes_3d[mask, 5]/0.6)
    # normalize all yaws
    boxes_3d[:, 6] = boxes_3d[:, 6]/(np.pi*0.5)
    return boxes_3d


In [6]:
def voxelnet_box_decoding(cls_labels, points_xyz, encoded_boxes):
    # Car
    mask = cls_labels[:, 0] == 2
    encoded_boxes[mask, 0] = encoded_boxes[mask, 0]*3.9
    encoded_boxes[mask, 1] = encoded_boxes[mask, 1]*1.56
    encoded_boxes[mask, 2] = encoded_boxes[mask, 2]*1.6
    encoded_boxes[mask, 3] = np.exp(encoded_boxes[mask, 3])*3.9
    encoded_boxes[mask, 4] = np.exp(encoded_boxes[mask, 4])*1.56
    encoded_boxes[mask, 5] = np.exp(encoded_boxes[mask, 5])*1.6
    # Pedestrian and Cyclist
    mask = (cls_labels[:, 0] == 1) + (cls_labels[:, 0] == 3)
    encoded_boxes[mask, 0] = encoded_boxes[mask, 0]*0.8
    encoded_boxes[mask, 1] = encoded_boxes[mask, 1]*1.73
    encoded_boxes[mask, 2] = encoded_boxes[mask, 2]*0.6
    encoded_boxes[mask, 3] = np.exp(encoded_boxes[mask, 3])*0.8
    encoded_boxes[mask, 4] = np.exp(encoded_boxes[mask, 4])*1.73
    encoded_boxes[mask, 5] = np.exp(encoded_boxes[mask, 5])*0.6
    # offset
    encoded_boxes[:, 0] = encoded_boxes[:, 0] + points_xyz[:, 0]
    encoded_boxes[:, 1] = encoded_boxes[:, 1] + points_xyz[:, 1]
    encoded_boxes[:, 2] = encoded_boxes[:, 2] + points_xyz[:, 2]
    # recover all yaws
    encoded_boxes[:, 6] = encoded_boxes[:, 6]*(np.pi*0.5)
    return encoded_boxes


In [20]:
test_encode_decode()


[[1.81224845 5.90164567 8.15363614 ... 4.20730137 0.73322686 0.04021222]
 [7.31756487 6.88995185 9.30986664 ... 2.12832478 8.57811801 7.66378568]
 [8.3145276  5.80780932 4.99382579 ... 2.96177745 2.78669028 2.17549157]
 ...
 [0.57421554 3.67564618 2.13717755 ... 9.82319504 1.14679337 2.61381973]
 [7.25639506 2.31389423 1.77562477 ... 5.79690448 7.24099838 6.11895205]
 [5.01832382 9.53696395 7.79107326 ... 0.30290649 1.22797955 6.94896101]]
[[  2.23197559   0.7120397    8.61338621 ...   0.88870003   0.20052549
    0.02559989]
 [  1.64560107   1.87020645  -0.33073367 ...   0.31064936   1.67921091
    4.87891749]
 [  0.63409451  -2.40424967   0.54082225 ...   2.96177745   2.78669028
    1.38496095]
 ...
 [ -1.04152006  -0.23535285  -3.32791167 ...   1.84006061  -0.33303396
    1.66400932]
 [ -2.30844902  -3.13440674 -11.98938292 ...   1.20920266   2.49058472
    3.89544586]
 [  5.34637639   5.48877691  12.44306669 ...  -1.74245254   0.7161958
    4.42384598]]
(1000, 7)
[[1.81224845 5.9016

In [21]:
def classaware_voxelnet_box_encoding(cls_labels, points_xyz, boxes_3d):
    """
    Args:
        boxes_3d: [None, num_classes, 7]
    """
    encoded_boxes_3d = np.zeros_like(boxes_3d)
    num_classes = boxes_3d.shape[1]
    points_xyz = np.expand_dims(points_xyz, axis=1)
    points_xyz = np.tile(points_xyz, (1, num_classes, 1))
    encoded_boxes_3d[:, :, 0] = boxes_3d[:, :, 0] - points_xyz[:, :, 0]
    encoded_boxes_3d[:, :, 1] = boxes_3d[:, :, 1] - points_xyz[:, :, 1]
    encoded_boxes_3d[:, :, 2] = boxes_3d[:, :, 2] - points_xyz[:, :, 2]
    # Car horizontal
    mask = cls_labels[:, 0] == 1
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/3.9
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.56
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/1.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/3.9)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.56)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/1.6)
    encoded_boxes_3d[mask, 0, 6] = boxes_3d[mask, 0, 6]/(np.pi*0.25)
    # Car vertical
    mask = cls_labels[:, 0] == 2
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/3.9
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.56
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/1.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/3.9)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.56)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/1.6)
    encoded_boxes_3d[mask, 0, 6] = (boxes_3d[mask, 0, 6]-np.pi*0.5)/(np.pi*0.25)
    # Pedestrian horizontal
    mask = cls_labels[:, 0] == 3
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/0.8
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.73
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/0.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/0.8)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.73)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/0.6)
    encoded_boxes_3d[mask, 0, 6] = boxes_3d[mask, 0, 6]/(np.pi*0.25)
    # Pedestrian vertical
    mask = cls_labels[:, 0] == 4
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/0.8
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.73
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/0.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/0.8)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.73)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/0.6)
    encoded_boxes_3d[mask, 0, 6] = (boxes_3d[mask, 0, 6]-np.pi*0.5)/(np.pi*0.25)
    # Cyclist horizontal
    mask = cls_labels[:, 0] == 5
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/1.76
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.73
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/0.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/1.76)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.73)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/0.6)
    encoded_boxes_3d[mask, 0, 6] = boxes_3d[mask, 0, 6]/(np.pi*0.25)
    # Cyclist vertical
    mask = cls_labels[:, 0] == 6
    encoded_boxes_3d[mask, 0, 0] = encoded_boxes_3d[mask, 0, 0]/1.76
    encoded_boxes_3d[mask, 0, 1] = encoded_boxes_3d[mask, 0, 1]/1.73
    encoded_boxes_3d[mask, 0, 2] = encoded_boxes_3d[mask, 0, 2]/0.6
    encoded_boxes_3d[mask, 0, 3] = np.log(boxes_3d[mask, 0, 3]/1.76)
    encoded_boxes_3d[mask, 0, 4] = np.log(boxes_3d[mask, 0, 4]/1.73)
    encoded_boxes_3d[mask, 0, 5] = np.log(boxes_3d[mask, 0, 5]/0.6)
    encoded_boxes_3d[mask, 0, 6] = (boxes_3d[mask, 0, 6]-np.pi*0.5)/(np.pi*0.25)

    return encoded_boxes_3d


In [27]:
def classaware_voxelnet_box_decoding(cls_labels, points_xyz, encoded_boxes):
    decoded_boxes_3d = np.zeros_like(encoded_boxes)
    # Car horizontal
    mask = cls_labels[:, 0] == 1
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*3.9
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.56
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*1.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*3.9
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.56
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*1.6
    decoded_boxes_3d[mask, 0, 6] = encoded_boxes[mask, 0, 6]*(np.pi*0.25)
    # Car vertical
    mask = cls_labels[:, 0] == 2
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*3.9
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.56
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*1.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*3.9
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.56
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*1.6
    decoded_boxes_3d[mask, 0, 6] = (
        encoded_boxes[mask, 0, 6])*(np.pi*0.25)+0.5*np.pi
    # Pedestrian horizontal
    mask = cls_labels[:, 0] == 3
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*0.8
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.73
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*0.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*0.8
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.73
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*0.6
    decoded_boxes_3d[mask, 0, 6] = encoded_boxes[mask, 0, 6]*(np.pi*0.25)
    # Pedestrian vertical
    mask = cls_labels[:, 0] == 4
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*0.8
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.73
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*0.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*0.8
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.73
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*0.6
    decoded_boxes_3d[mask, 0, 6] = (
        encoded_boxes[mask, 0, 6])*(np.pi*0.25)+0.5*np.pi
    # Cyclist horizontal
    mask = cls_labels[:, 0] == 5
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*1.76
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.73
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*0.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*1.76
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.73
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*0.6
    decoded_boxes_3d[mask, 0, 6] = encoded_boxes[mask, 0, 6]*(np.pi*0.25)
    # Cyclist vertical
    mask = cls_labels[:, 0] == 6
    decoded_boxes_3d[mask, 0, 0] = encoded_boxes[mask, 0, 0]*1.76
    decoded_boxes_3d[mask, 0, 1] = encoded_boxes[mask, 0, 1]*1.73
    decoded_boxes_3d[mask, 0, 2] = encoded_boxes[mask, 0, 2]*0.6
    decoded_boxes_3d[mask, 0, 3] = np.exp(encoded_boxes[mask, 0, 3])*1.76
    decoded_boxes_3d[mask, 0, 4] = np.exp(encoded_boxes[mask, 0, 4])*1.73
    decoded_boxes_3d[mask, 0, 5] = np.exp(encoded_boxes[mask, 0, 5])*0.6
    decoded_boxes_3d[mask, 0, 6] = (
        encoded_boxes[mask, 0, 6])*(np.pi*0.25)+0.5*np.pi
    # offset
    num_classes = encoded_boxes.shape[1]
    points_xyz = np.expand_dims(points_xyz, axis=1)
    points_xyz = np.tile(points_xyz, (1, num_classes, 1))
    decoded_boxes_3d[:, :, 0] = decoded_boxes_3d[:, :, 0] + points_xyz[:, :, 0]
    decoded_boxes_3d[:, :, 1] = decoded_boxes_3d[:, :, 1] + points_xyz[:, :, 1]
    decoded_boxes_3d[:, :, 2] = decoded_boxes_3d[:, :, 2] + points_xyz[:, :, 2]
    return decoded_boxes_3d




In [23]:
def test_classaware_encode_decode():
    cls_labels = np.random.choice(8, (1000, 1))
    points_xyz = np.random.random((1000, 3))*10
    boxes_3d = np.random.random((1000, 1, 7))*10
    boxes_3d[:, :, 3:6] = np.absolute(boxes_3d[:, :, 3:6])
    encoded_boxes = classaware_voxelnet_box_encoding(
        cls_labels, points_xyz, boxes_3d)
    decoded_boxes = classaware_voxelnet_box_decoding(
        cls_labels, points_xyz, encoded_boxes)
    valid_box_indices = np.nonzero((cls_labels<7)*(cls_labels>0))[0]
    assert np.isclose(
        decoded_boxes[valid_box_indices], boxes_3d[valid_box_indices]).all()


In [28]:
test_classaware_encode_decode()

