# Prepare data for VoxelNet

Copy a subset of data into a designated folder for training and testing.

In [2]:
import os, shutil
import numpy as np
import crop

In [9]:
src_dir = "../../voxelnet/KITTI/training"
des_dir = "../DATA_DIR"

In [10]:
img_dir = "image_2"
lidar_dir = "velodyne"
label_dir = "label_2"
calib_dir = "calib"

all_subdirs = [img_dir,lidar_dir,label_dir,calib_dir]
all_exts = [".png",".bin",".txt",".txt"]

In [11]:
# Create new directories in the destination folder
for middir in ["training","validation"]:
    new_dir = os.path.join(des_dir,middir)
    if not os.path.isdir(new_dir):
        os.mkdir(new_dir)
    # Same for sub folders
    for subdir in all_subdirs:
        new_dir = os.path.join(des_dir,middir,subdir)
        if not os.path.isdir(new_dir):
            os.mkdir(new_dir)

In [12]:
def get_all_fnames(loc):
    """ Get all filenames in a folder without extensions """
    items = []
    for item in os.scandir(loc):
        if item.is_file():
            items.append(os.path.splitext(item.name)[0])
    return items

In [13]:
all_fnames_img = get_all_fnames(os.path.join(src_dir,img_dir))
all_fnames_lidar = get_all_fnames(os.path.join(src_dir,lidar_dir))
all_fnames_label = get_all_fnames(os.path.join(src_dir,label_dir))
all_fnames_calib = get_all_fnames(os.path.join(src_dir,calib_dir))

In [14]:
# Check if inconsistency in files
print(set(all_fnames_img) == set(all_fnames_lidar))
print(set(all_fnames_img) == set(all_fnames_label))
print(set(all_fnames_img) == set(all_fnames_calib))

True
True
True


In [15]:
def random_sampling(dataset, num_train,  num_test):
    """ Randomly pick training and testing data from the dataset """
    dataset = np.array(dataset)
    N = len(dataset)
    if N < num_train+num_test:
        raise ValueError("Cannot select non-overlapping train and test sets.")
    shuffle = np.random.permutation(N)
    train_set = dataset[shuffle[:num_train]]
    test_set = dataset[shuffle[num_train:num_train+num_test]]
    return train_set, test_set

In [16]:
def copy_files(middir, fnames):
    """ Copy files from source to destination folder
    
    mid_dir: either "training" or "validation"
    fnames: file names without extension
    """
    for subdir, ext in zip(all_subdirs, all_exts):
        for fname in fnames:
            src = os.path.join(src_dir,subdir,fname+ext)
            des = os.path.join(des_dir,middir,subdir,fname+ext)
            shutil.copyfile(src,des)

### Execution

In [17]:
all_fnames = all_fnames_img
train, test = random_sampling(all_fnames, 10, 5)
print(train)
print(test)
copy_files("training", train)
copy_files("validation", test)

['000158' '001489' '001588' '004256' '006317' '004702' '007100' '002468'
 '006042' '001889']
['003623' '007084' '000208' '000927' '006850']


In [18]:
def crop_files(middir, fnames):
    """ Crop images
    
    mid_dir: either "training" or "validation"
    fnames: file names without extension
    """
    for fname in fnames:
        fname_img = os.path.join(des_dir,middir,img_dir,fname+".png")
        fname_lidar = os.path.join(des_dir,middir,lidar_dir,fname+".bin")
        fname_calib = os.path.join(des_dir,middir,calib_dir,fname+".txt")
        points = crop.align_img_and_pc(fname_img, fname_lidar, fname_calib)
    
        output_name = os.path.join(des_dir,middir,lidar_dir,fname+".bin")
        points[:,:4].astype('float32').tofile(output_name)        

In [19]:
crop_files("training",train)

In [20]:
crop_files("validation",test)