## PointNet-based autoencoder for 3D point clouds

Let us now consider a deep learning approach based on the PointNet architecture.

 It is based on a convolutional neural network (CNN) architecture. The PointNet architecture is described in the paper [PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation](https://arxiv.org/abs/1612.00593). 


### Dataset preparation

We want to train a PointNet-based autoencoder on selected classes separately, in order to maximize the decoding quality.

In particular, we noticed that there are some classes (road, sidewalk, ego vehicle, building, vegetation, car) whose number of points is orders of magnitude larger than the others. therefore, we will train a separate autoencoder for each of these classes and build an ensemble of autoencoders, each of which trained on these classes.


To tihs end, we do the following:
1) Pick 500 point clouds from left, right and top LIDARS;
2) Combine the points from the 3 LIDARS into a single point cloud;
3) For each selected class among the 6 classes mentioned above, we extract the points belonging to that class and save them in a separate file;
4) Train an autoencoder on each of the 6 classes separately;
5) Build an ensemble of autoencoders, each of which trained on a different class.


In [1]:
import argparse
import numpy as np
import open3d
import os
import torch
import plyfile as ply
import torch.nn as nn
from torch.utils.data import DataLoader
# from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import open3d as o3d
from tqdm import tqdm
from glob import glob

In [1]:

def combine (
    main_dir, 
    Nfiles=100,
    outdir='dataset_final',
    ):
    import glob 
    # find all scan codes
    left_lidar_codes = glob.glob(os.path.join(main_dir, 'LIDAR_FRONT_LEFT', '*.ply'))
    left_lidar_codes = [f.split('/')[-1].split('.')[0] for f in left_lidar_codes]
    left_lidar_codes.sort()
    print (f"Number of left lidar codes: {len(left_lidar_codes)}")

    right_lidar_codes = glob.glob(os.path.join(main_dir, 'LIDAR_FRONT_RIGHT', '*.ply'))
    right_lidar_codes = [f.split('/')[-1].split('.')[0] for f in right_lidar_codes]
    right_lidar_codes.sort()
    print (f"Number of left lidar codes: {len(left_lidar_codes)}")

    top_lidar_codes = glob.glob(os.path.join(main_dir, 'LIDAR_TOP', '*.ply'))
    top_lidar_codes = [f.split('/')[-1].split('.')[0] for f in top_lidar_codes]
    top_lidar_codes.sort()
    print (f"Number of left lidar codes: {len(left_lidar_codes)}")

    # find intersection of scan code
    print ("Finding intersection of scan codes")
    scan_codes = set(left_lidar_codes).intersection(set(right_lidar_codes)).intersection(set(top_lidar_codes))
    scan_codes = list(scan_codes)
    scan_codes.sort()
    scan_codes = scan_codes[:Nfiles]
    print (f"Number of scan codes: {len(scan_codes)}")


    # combine lidar data
    all_lidars = ['LIDAR_FRONT_LEFT', 'LIDAR_FRONT_RIGHT', 'LIDAR_TOP']
    outdir = os.path.join(main_dir, outdir)
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # save combined point clouds and labels
    for code in tqdm(scan_codes):
        pcd_combined = o3d.geometry.PointCloud() 
        labels = []
        for lidar in all_lidars:
            file = os.path.join(main_dir, lidar,  code + '.ply')
            # load labels, i.e the  PlyProperty('ObjTag', 'uchar')), fourth property of the ply file
            plydata = ply.PlyData.read(file)
            labels += list(np.array(plydata.elements[0].data['ObjTag']))
            
            # load point clouds
            pc = o3d.io.read_point_cloud(file, format='ply')
            pcd_combined += pc

        

        # get labels
        labels = np.array(labels)
        #save labels
        # np.save(os.path.join(outdir, code + '_labels.npy'), labels)
        o3d.io.write_point_cloud(os.path.join(outdir, code + '.ply'), pcd_combined)

        # read again the file with plyfile and add the labels as a property
        plydata = ply.PlyData.read(os.path.join(outdir, code + '.ply'))

        # add the labels as a property
        x, y, z = plydata.elements[0].data['x'], plydata.elements[0].data['y'], plydata.elements[0].data['z']
        element = ply.PlyElement.describe(np.array(list(zip(x, y, z, labels)), dtype=[('x', 'f4'), ('y', 'f4'), ('z', 'f4'), ('ObjTag', 'u1')]), 'vertex')
        plydata.elements = [element]
        plydata.write(os.path.join(outdir, code + '.ply'))
        

In [18]:
root = '../dataset-downloader-kit/CV/dataset/Town01_Opt_ClearSunset'
combine(root, Nfiles=500, outdir='dataset_autoencoder')


Number of left lidar codes: 1634
Number of left lidar codes: 1634
Number of left lidar codes: 1634
Finding intersection of scan codes
Number of scan codes: 500


100%|██████████| 500/500 [09:23<00:00,  1.13s/it]


In [2]:
def class_autoencoder(
    root_dir,
    Nfiles=100,
    classes = [1,100, 7, 8, 9, 10], 
    outdir='dataset_autoencoder_labels',
    ):
    import glob
    # all files in the directory
    all_files = glob.glob(os.path.join(root_dir, '*.ply'))
    all_files.sort()
    print (f"Number of files: {len(all_files)} in {root_dir}")
    # read all the files and get the point clouds corresponding to the classes
    for file in tqdm(all_files):
        plydata = ply.PlyData.read(file)
        labels = np.array(plydata.elements[0].data['ObjTag']) # get the labels

        for lab in classes:

            # save the point clouds
            lab_points = plydata.elements[0].data[labels == lab]

            # save the labels
            lab_points = ply.PlyElement.describe(lab_points, 'vertex')
            lab_points = ply.PlyData([lab_points])

            save_dir = os.path.join (outdir, "label_" + str(lab))
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
                
            lab_points.write( os.path.join(save_dir, file.split('/')[-1]) )


In [None]:

root = '../dataset-downloader-kit/CV/dataset/Town01_Opt_ClearSunset/dataset_autoencoder'
class_autoencoder(root, Nfiles=500, outdir='dataset_autoencoder_labels')

In [3]:
# reorganize in subset with val and train
import shutil
def split_train_test(train_ratio=0.8):
    for l in [1,100, 7, 8, 9, 10]:
        directory = os.path.join('dataset_autoencoder_labels', 'label_' + str(l))
        # create train and val directories
        train_dir = os.path.join(directory, 'train_data')
        val_dir = os.path.join(directory, 'val_data')

        all_files = glob(os.path.join(directory, '*.ply'))
        all_files.sort()
        Nfiles = len(all_files)
        Ntrain = int(train_ratio * Nfiles)
        Nval = Nfiles - Ntrain

        # create train and val directories
        if not os.path.exists(train_dir):
            os.makedirs(train_dir)
        if not os.path.exists(val_dir):
            os.makedirs(val_dir)

        # move files
        for i, file in enumerate(all_files):
            if i < Ntrain:
                shutil.move(file, train_dir)
            else:
                shutil.move(file, val_dir)


In [4]:
split_train_test()

## Training

The training is performed by means of an external script, that we dont show here for sake of brevity. 

The bash script to run all the trainings follows:

```bash
for class_index in 1 100 7 8 9 10:
do
    root_dir="dataset_autoencoder_labels/label_"$class_index
    save_dir="dataset_autoencoder_labels/checkpoints_label_"$class_index
    python train.py --root $root_dir --batchsize 32 --epoches 100 --saved_path $save_dir
done
```