In [17]:
from annoy import AnnoyIndex

In [18]:
import pandas as pd
import numpy as np
import torch as pt
import multiprocessing
#from bps import bps
from sembps.bps import bps
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
import h5py

In [45]:
MAIN_PATH = os.path.join('aptbps-code')
LOGS_PATH = os.path.join(MAIN_PATH, 'logs')
DATA_PATH = os.path.join(MAIN_PATH, 'data')

train_path = os.path.join(DATA_PATH, 'train')
hdf5_path = os.path.join(DATA_PATH, 'hdf5')
no_unlabeled_h5_path = os.path.join(DATA_PATH, 'no_unlabeled_hdf5')
hdf5_train_path = os.path.join(hdf5_path, 'train')
encoded_hdf5_path = os.path.join(DATA_PATH, 'tree_encoded_hdf5')

# All the clouds in the training dataset
train_files = [
    "bildstein_station1_xyz_intensity_rgb",
    "bildstein_station3_xyz_intensity_rgb",
    "bildstein_station5_xyz_intensity_rgb",
    "domfountain_station1_xyz_intensity_rgb",
    "domfountain_station2_xyz_intensity_rgb",
    "domfountain_station3_xyz_intensity_rgb",
    "neugasse_station1_xyz_intensity_rgb",
    "sg27_station1_intensity_rgb",
    "sg27_station2_intensity_rgb",
    "sg27_station4_intensity_rgb",
    "sg27_station5_intensity_rgb",
    "sg27_station9_intensity_rgb",
    "sg28_station4_intensity_rgb",
    "untermaederbrunnen_station1_xyz_intensity_rgb",
    "untermaederbrunnen_station3_xyz_intensity_rgb",
]

In [49]:
n_bps_points = 512
n_dims = 3
radius = 1.5
random_seed = 13

In [51]:
for i in range (0, n_bps_points):
    basis_set = bps.generate_random_basis(n_bps_points, n_dims=n_dims, radius=radius, random_seed=random_seed)
    

(512, 3)

In [116]:
# batch of 100 point clouds to convert
x = np.random.normal(size=[100, 2048, 3])

# optional point cloud normalization to fit a unit sphere
x = bps.normalize(x)

In [122]:
from timeit import default_timer as timer
import time
from sklearn.neighbors import NearestNeighbors

In [117]:
n_clouds, n_points, n_dims = x.shape
n_trees = 10

basis_set = bps.generate_random_basis(n_bps_points, n_dims=n_dims, radius=radius, random_seed=random_seed)

n_bps_points = basis_set.shape[0]

x_bps = np.zeros([n_clouds, n_bps_points])
        
fid_lst = range(0, x.shape[0])

idx_bps = np.zeros([n_clouds, n_bps_points])

# Bad version, saves file

In [159]:
start = timer()
for fid in fid_lst:
    #nbrs = NearestNeighbors(n_neighbors=1, leaf_size=leaf_size, algorithm=nn_algo).fit(x[fid])
    # Create an annoy index with euclidean as metric
    ann = AnnoyIndex(n_dims, 'euclidean')

    # Populate the rest with the input cloud
    for in_idx, in_point in enumerate(x[fid]):
        ann.add_item(n_bps_points+in_idx, in_point)

    # Build index with n_trees
    ann.build(1)

    # Maybe unnecessary?
    ann.save('bps.ann')
    
    new_ann = AnnoyIndex(n_dims, 'euclidean')
    new_ann.load('bps.ann')

    #nn_dist, nn_idx = new_ann.get_nns_by_vector([basis_set[0][0][0], b_point[0][1], b_point[0][2]], 1, include_distances=True)
    
    # Find nn for each point in basis point cloud
    for b_idx, b_point in enumerate(basis_set):
        nn_idx, nn_dist = new_ann.get_nns_by_vector([b_point[0], b_point[1], b_point[2]], 1, include_distances=True)
        
        idx_bps[fid][b_idx] = nn_idx[0]
        x_bps[fid][b_idx] = nn_dist[0]
end = timer()
print(end-start)

0.8510049781762064


# Good version, doesnt save temporary file, which we dont need cause were querying right after building the tree.
# The tunable parameter is the n_trees in ann.build.

In [160]:
start = timer()
for fid in fid_lst:
    #nbrs = NearestNeighbors(n_neighbors=1, leaf_size=leaf_size, algorithm=nn_algo).fit(x[fid])
    # Create an annoy index with euclidean as metric
    ann = AnnoyIndex(n_dims, 'euclidean')

    # Populate the index with the input cloud
    for in_idx, in_point in enumerate(x[fid]):
        ann.add_item(in_idx, in_point)

    # Build index with n_trees
    ann.build(1)

    #nn_dist, nn_idx = new_ann.get_nns_by_vector([basis_set[0][0][0], b_point[0][1], b_point[0][2]], 1, include_distances=True)
    
    # Find nn for each point in basis point cloud
    for b_idx, b_point in enumerate(basis_set):
        nn_idx, nn_dist = ann.get_nns_by_vector([b_point[0], b_point[1], b_point[2]], 1, include_distances=True)
        
        idx_bps[fid][b_idx] = nn_idx[0]
        x_bps[fid][b_idx] = nn_dist[0]
end = timer()
print(end-start)

0.8276478899642825


In [155]:
x_bps

array([[0.29554352, 0.59788144, 0.49960694, ..., 0.04448119, 0.32134956,
        0.2986187 ],
       [0.31377131, 0.7395075 , 0.50627249, ..., 0.02094659, 0.3925654 ,
        0.45971313],
       [0.3412573 , 0.64127672, 0.60588777, ..., 0.01918616, 0.48762757,
        0.29478571],
       ...,
       [0.34288561, 0.62626928, 0.34762919, ..., 0.03135511, 0.46728146,
        0.38572195],
       [0.31658003, 0.72895372, 0.3647632 , ..., 0.05716956, 0.42225581,
        0.26511151],
       [0.27176008, 0.81551951, 0.52390742, ..., 0.02152703, 0.57380754,
        0.30141374]])

# Output of 16-leaf kd tree below for comparison, you can see that it's the same.

In [161]:
start = timer()
for fid in fid_lst:
            nbrs = NearestNeighbors(n_neighbors=1, leaf_size=16, algorithm='kd_tree').fit(x[fid])
            fid_dist, npts_ix = nbrs.kneighbors(basis_set)
            x_bps[fid] = fid_dist.squeeze()
            idx_bps[fid] = npts_ix.squeeze()
end = timer()
print(end-start)

0.22071778518147767


In [162]:
x_bps

array([[0.29554354, 0.59788139, 0.49960693, ..., 0.04448117, 0.32134956,
        0.29861869],
       [0.3137713 , 0.73950745, 0.50627251, ..., 0.02094659, 0.39256541,
        0.45971308],
       [0.34125727, 0.64127665, 0.60588776, ..., 0.01918616, 0.48762751,
        0.29478571],
       ...,
       [0.28899238, 0.62626925, 0.34762921, ..., 0.0313551 , 0.4672814 ,
        0.34549797],
       [0.31658004, 0.72895365, 0.36476322, ..., 0.05716955, 0.42225578,
        0.26511151],
       [0.2717601 , 0.81551941, 0.52390743, ..., 0.02152704, 0.55576275,
        0.30141376]])