Imports

In [1]:
import pdb
import numpy as np
import os, shutil
from tqdm import tqdm_notebook
import pandas as pd
import open3d as o3d
import gc
import zipfile
from multiprocessing import Pool, cpu_count
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:80% !important; }</style>"))

Set Paths and constants

In [2]:
# pcd_folder='/home/sabyasachi/Projects/ati/data/data/datasets/Carla/lidarParam1/pair_corrupt/dynamic/'
# npy_folder='/home/sabyasachi/Projects/ati/data/data/datasets/Carla/lidarParam1/pair_corrupt/dynamic_NPY/'

BASE_PATH = '/home/saby/Projects/ati/data/data/datasets/KITTI/data_odometry_labels/dataset/'
STATIC_FOLDER = "sequences"
# Do for both
# PCD_FOLDER = "_out"
PCD_FOLDER = "_segment"
# PCD_FOLDER = "_static_only"
# PCD_FOLDER = "corr_static"
EXTRACTED_ARRAY_FNAME = "arr_0.npy"

WITH_COLOR = True

# BATCH_SIZE = 80000
# BATCH_SIZE = 2048
RANGE_IMAGE_HEIGHT = 64
RANGE_IMAGE_WIDTH = 1024

Preprocessing functions from the paper source code

In [3]:
def get_quadrant(point):
    if point[0] >= 0. and point[1] >= 0. :
        return 0
    elif point[0] <= 0. and point[1] >= 0. : 
        return 1
    elif point[0] <= 0. and point[1] <= 0. : 
        return 2
    elif point[0] >= 0. and point[1] <= 0. : 
        return 3
    else :
        raise Exception('invalid input %s', point) 


def passed_origin(x_t, x_t1):
    if get_quadrant(x_t1) == 3 and get_quadrant(x_t) == 0: 
        return True
    else : 
        return False


def fit_quadrant(points, quadrant, desired_amt):
    
    
    points = np.asarray(points)
    slots = []
    slot_size = np.pi / (2 * desired_amt)
    for i in range(int(desired_amt)) : slots.append([])
    if quadrant == 0: 
        points = points[::-1]
    elif quadrant == 1 : 
        points[:, 0] = - points[:, 0]
    elif quadrant == 2 :
        points = points[::-1] 
        points[:, 0] = - points[:, 0]
        points[:, 1] = - points[:, 1]
    elif quadrant == 3 : 
        points[:, 1] = - points[:, 1]

    # import pdb; pdb.set_trace()
    for point in points :
        angle = np.arctan(point[1] / (point[0]+0.000001))
        index = min(int(angle / slot_size), desired_amt - 1)
        slots[int(index)].append(point)

    for i in range(len(slots)):
        if len(slots[i]) == 0 : 
            slots[i] = np.array([0., 0., 0., 0.])
        else :
            full_slot = np.asarray(slots[i])
            slots[i] = full_slot.mean(axis=0)

    points = np.asarray(slots)
    if quadrant == 0: 
        points = points[::-1]
    elif quadrant == 1 : 
        points[:, 0] = - points[:, 0]
    elif quadrant == 2 : 
        points = points[::-1]
        points[:, 0] = - points[:, 0]
        points[:, 1] = - points[:, 1]
    elif quadrant == 3 : 
        points[:, 1] = - points[:, 1]

    return points

def parse_velo(velo):
    # points closer to the origin (0,0,0) are at the end of the point cloud.
    # invert the point cloud such that we begin near the origin. 
    
    # returns: a H x 4 x ? array, split into quadrants
    velo = velo[::-1]
    lines = []
    current_point = velo[0]
    current_quadrant = get_quadrant(current_point)
    current_line = [[], [], [], []]
    quadrant_switches = 0
    for point in velo :
        point_quadrant = get_quadrant(point)
        
        if passed_origin(current_point, point):
            lines.append(current_line)
            current_line = [[], [], [], []]

        current_line[point_quadrant].append(point)
        current_quadrant = point_quadrant
        current_point = point

    return lines


def setmatch(lines,lenLines):
    arr=[[np.array([0,0,0,0]),np.array([0,0,0,0])]]
    if len(lines) > lenLines:
        return lines[:lenLines]
    else:
        for i in range(abs(len(lines)-lenLines)):
            lines.append(arr)
    return lines

def process_velo(velo, points_per_layer, stop=False):
    
    lenLines=RANGE_IMAGE_HEIGHT
    lines = parse_velo(velo)
    inverse = quad_to_pc_inv(lines)
#     lines = lines[2:-1]
#     print(lines[])
#     print((lines[0]))
#     raise SystemError
    if(len(lines)!=lenLines):
        lines=setmatch(lines,lenLines)
#     print(len(lines), flush=True)
    if len(lines) != RANGE_IMAGE_HEIGHT : raise Exception('invalid nb un of lines')
    out_tensor = np.zeros((RANGE_IMAGE_HEIGHT, points_per_layer, 4))
    if stop:
        import pdb; pdb.set_trace()
        x = 1
    for j in range(len(lines)):
        line = lines[j]
        out_line = np.zeros((points_per_layer, 4))
        for i in range(len(line)):
            if(len(line[i])==0):
                line[i]=[np.array([0.0,0.0,0.0,0.0])]
            gridded = fit_quadrant(line[i], i, points_per_layer / 4)
            out_tensor[j][i*int(points_per_layer/4):(i+1)*int(points_per_layer/4), :] = gridded[::-1]

    return out_tensor, inverse


def quad_to_pc_inv(lines, th=3.):
    # lines is a 63 x 4 array, where each slot has an array of 4d/3d points
    # goal : get an array of points that fills empty spaces
    points = []
    for i in range(len(lines)) :
        line = lines[i] 
        distance = []
        for quad in line : 
            for point in quad : 
                x, y, z = point[:3]
                distance.append(x**2 + y**2)
        distance = np.array(distance)
        std = distance.std()
        sorted_indices = np.argsort(distance)
        median_index = sorted_indices[int(sorted_indices.shape[0]*0.95)]
        median = distance[median_index]

        for quad in line : 
            for point in quad : 
                x, y, z = point[:3]
                dist = x ** 2 + y ** 2 
                if dist < median and (median/dist-1.) > th:#*std : 
                    # blocked point --> scale to get real pt
                    scale = np.sqrt(median / dist)
                    scaled = scale * point
                    points.append(scaled)


    return np.array(points)


In [4]:
def getint(name):
    return int(name.split('.')[0])

# def getint(name):
#     return int(name)

In [5]:
def parallel_pcd2begin_npy(pcd_fname):
    pcd_file_path = os.path.join(PCD_PATH, pcd_fname)
    pcd = o3d.io.read_point_cloud(pcd_file_path)
    pcd_arr = np.asarray(pcd.points)
    if WITH_COLOR:
        clr_arr = np.asarray(pcd.colors)[:,0].reshape(-1,1)
        pcd_arr = np.append(pcd_arr, clr_arr, axis=1)
    else:
        pcd_arr = np.append(pcd_arr, np.zeros((pcd_arr.shape[0],1)), axis=1)
    
    npy_fname = pcd_fname[:-4] + ".npy"
    npy_file_path = os.path.join(INITIAL_NPY_PATH, npy_fname)
    pcd_arr.dump(open(npy_file_path, 'wb'))

def parallel_npy2processed(npy_file):
    gc.collect()
    npy_file_path = os.path.join(INITIAL_NPY_PATH, npy_file)
    raw_lidar = np.load(npy_file_path, allow_pickle=True)
    processed_lidar, _ = process_velo(raw_lidar, RANGE_IMAGE_WIDTH)
    return processed_lidar

PCD to NPY files

In [6]:
for sub_folder in sorted(os.listdir(os.path.join(BASE_PATH, STATIC_FOLDER))):#, key=getint):
    if int(sub_folder) > 10:
        continue
    
    print("Sub folder: {}".format(sub_folder))
    PCD_PATH = os.path.join(BASE_PATH, STATIC_FOLDER, sub_folder, PCD_FOLDER)
    if not os.path.exists(PCD_PATH):
        print("Did not find : {}".format(PCD_PATH))

    INITIAL_NPY_FOLDER = PCD_FOLDER + "_begin_npy"
    INITIAL_NPY_PATH = os.path.join(BASE_PATH, STATIC_FOLDER, sub_folder, INITIAL_NPY_FOLDER)
    if not os.path.exists(INITIAL_NPY_PATH):
        os.makedirs(INITIAL_NPY_PATH)
    else:
        shutil.rmtree(INITIAL_NPY_PATH)
        os.makedirs(INITIAL_NPY_PATH)

    NPZ_FOLDER = PCD_FOLDER + "_npz"
    NPZ_PATH = os.path.join(BASE_PATH, STATIC_FOLDER, sub_folder, NPZ_FOLDER)
    if not os.path.exists(NPZ_PATH):
        os.makedirs(NPZ_PATH)
    else:
        shutil.rmtree(NPZ_PATH)
        os.makedirs(NPZ_PATH)

    OUT_NPY_FOLDER = PCD_FOLDER + "_out_npy"
    OUT_NPY_PATH = os.path.join(BASE_PATH, STATIC_FOLDER, sub_folder, OUT_NPY_FOLDER)
    if not os.path.exists(OUT_NPY_PATH):
        os.makedirs(OUT_NPY_PATH)
    else:
        shutil.rmtree(OUT_NPY_PATH)
        os.makedirs(OUT_NPY_PATH)



    parallel_npy_args = sorted(os.listdir(PCD_PATH), key=getint)
    process_npy_pool = Pool(cpu_count()-1)
    __ = [each for each in tqdm_notebook(process_npy_pool.imap(parallel_pcd2begin_npy,
                                                           parallel_npy_args),
                                         total = len(parallel_npy_args))]
    process_npy_pool.terminate()
    gc.collect()


    npy_folder_size = len(os.listdir(INITIAL_NPY_PATH))
#     leftout_size = npy_folder_size % BATCH_SIZE
#     n_batches = int(npy_folder_size / BATCH_SIZE)
    file_list = sorted(os.listdir(INITIAL_NPY_PATH), key=getint)
#     full_npy_file_list = np.split(np.array(file_list)[:-leftout_size], n_batches)
#     # To consider last small batch
#     full_npy_file_list += [np.array(file_list[-leftout_size:])]
    full_npy_file_list = [np.array(file_list)]

    print("No of npzs: {}".format(len(full_npy_file_list)))

    npz_file_idx = 0
    for some_npy_file_list in full_npy_file_list:
        print(len(some_npy_file_list))
        parallel_processed_args = some_npy_file_list
        process_processed_pool = Pool(cpu_count()-1)
        one_run_npy_file = [each for each in tqdm_notebook(process_processed_pool.imap(parallel_npy2processed,
                                                               parallel_processed_args), total=len(parallel_processed_args))]
        process_processed_pool.terminate()
        gc.collect()

        npz_file_path = os.path.join(NPZ_PATH, str(npz_file_idx))
        np.savez(npz_file_path, one_run_npy_file)
        npz_file_idx += 1

    print("Extracting out_npys")
    for npz_fname in tqdm_notebook(sorted(os.listdir(NPZ_PATH), key=getint)):
        npz_path = os.path.join(NPZ_PATH, npz_fname)
        with zipfile.ZipFile(npz_path, 'r') as zip_ref:
            zip_ref.extractall(OUT_NPY_PATH)

        out_npy_fname = npz_fname[:-4] + ".npy"
        src_fname = os.path.join(OUT_NPY_PATH, EXTRACTED_ARRAY_FNAME)
        dst_fname = os.path.join(OUT_NPY_PATH, out_npy_fname)
        os.rename(src_fname, dst_fname)
        
    shutil.rmtree(INITIAL_NPY_PATH)
    shutil.rmtree(NPZ_PATH)

Sub folder: 00


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=4541.0), HTML(value='')))


No of npzs: 1
4541


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=4541.0), HTML(value='')))


Extracting out_npys


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 01


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


No of npzs: 1
1101


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 02


HBox(children=(FloatProgress(value=0.0, max=4661.0), HTML(value='')))


No of npzs: 1
4661


HBox(children=(FloatProgress(value=0.0, max=4661.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 03


HBox(children=(FloatProgress(value=0.0, max=801.0), HTML(value='')))


No of npzs: 1
801


HBox(children=(FloatProgress(value=0.0, max=801.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 04


HBox(children=(FloatProgress(value=0.0, max=271.0), HTML(value='')))


No of npzs: 1
271


HBox(children=(FloatProgress(value=0.0, max=271.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 05


HBox(children=(FloatProgress(value=0.0, max=2761.0), HTML(value='')))


No of npzs: 1
2761


HBox(children=(FloatProgress(value=0.0, max=2761.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 06


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


No of npzs: 1
1101


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 07


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


No of npzs: 1
1101


HBox(children=(FloatProgress(value=0.0, max=1101.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 08


HBox(children=(FloatProgress(value=0.0, max=4071.0), HTML(value='')))


No of npzs: 1
4071


HBox(children=(FloatProgress(value=0.0, max=4071.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 09


HBox(children=(FloatProgress(value=0.0, max=1591.0), HTML(value='')))


No of npzs: 1
1591


HBox(children=(FloatProgress(value=0.0, max=1591.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


Sub folder: 10


HBox(children=(FloatProgress(value=0.0, max=1201.0), HTML(value='')))


No of npzs: 1
1201


HBox(children=(FloatProgress(value=0.0, max=1201.0), HTML(value='')))


Extracting out_npys


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




ValueError: invalid literal for int() with base 10: 'prepareTestData'

In [8]:
DST_FOLDER = "testing_data"
DST_DATA_FOLDER_PATH = os.path.join(BASE_PATH, DST_FOLDER)
if not os.path.exists(DST_DATA_FOLDER_PATH):
    os.makedirs(DST_DATA_FOLDER_PATH)
else:
    shutil.rmtree(DST_DATA_FOLDER_PATH)
    os.makedirs(DST_DATA_FOLDER_PATH)

for sub_folder in tqdm_notebook(sorted(os.listdir(os.path.join(BASE_PATH, STATIC_FOLDER)))):
#     print("Sub folder: {}".format(sub_folder))
    OUT_NPY_FOLDER = PCD_FOLDER + "_out_npy"
    
    extracted_file = "0.npy"
    extracted_file_path = os.path.join(BASE_PATH, STATIC_FOLDER, sub_folder, OUT_NPY_FOLDER, extracted_file)
    if not os.path.exists(extracted_file_path):
        print("Did not find : {}".format(extracted_file_path))
        
    dst_file_name = sub_folder + ".npy"
    dst_file_path = os.path.join(DST_DATA_FOLDER_PATH, dst_file_name)
    shutil.copy(extracted_file_path, dst_file_path)
    
    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if __name__ == '__main__':


HBox(children=(FloatProgress(value=0.0, max=24.0), HTML(value='')))

Did not find : /home/saby/Projects/ati/data/data/datasets/KITTI/data_odometry_labels/dataset/sequences/11/_segment_out_npy/0.npy


FileNotFoundError: [Errno 2] No such file or directory: '/home/saby/Projects/ati/data/data/datasets/KITTI/data_odometry_labels/dataset/sequences/11/_segment_out_npy/0.npy'