In [1]:
import torch.utils.data as data

import random
import numbers
from PIL import Image, ImageMath
import os
import os.path
import numpy as np
import struct
import math

import torch
import torchvision
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import faiss
import time

import pcl
from PIL import Image, ImageDraw
import faiss

import timeit

%matplotlib qt5

In [2]:
class FarthestSampler:
    def __init__(self):
        pass

    def calc_distances(self, p0, points):
        return ((p0 - points) ** 2).sum(axis=1)

    def sample(self, pts, k):
        farthest_pts = np.zeros((k, 3))
        farthest_pts[0] = pts[np.random.randint(len(pts))]
        distances = self.calc_distances(farthest_pts[0], pts)
        for i in range(1, k):
            farthest_pts[i] = pts[np.argmax(distances)]
            distances = np.minimum(distances, self.calc_distances(farthest_pts[i], pts))
        return farthest_pts
    

class KNNBuilder:
    def __init__(self, k):
        self.k = k
        self.dimension = 3

    def build_nn_index(self, database):
        '''
        :param database: numpy array of Nx3
        :return: Faiss index, in CPU
        '''
        index = faiss.IndexFlatL2(self.dimension)  # dimension is 3
        index.add(database)
        return index

    def search_nn(self, index, query, k):
        '''
        :param index: Faiss index
        :param query: numpy array of Nx3
        :return: D: numpy array of Nxk
                 I: numpy array of Nxk
        '''
        D, I = index.search(query, k)
        return D, I

    def self_build_search(self, x):
        '''

        :param x: numpy array of Nxd
        :return: D: numpy array of Nxk
                 I: numpy array of Nxk
        '''
        x = np.ascontiguousarray(x, dtype=np.float32)
        index = self.build_nn_index(x)
        D, I = self.search_nn(index, x, self.k)
        return D, I
    

class PCSampler:
    def __init__(self, leaf_size, minimum_pc_num):       
        self.leaf_size = leaf_size
        self.minimum_pc_num = minimum_pc_num
    
    def sample_pc(self, pc, leaf_size):
        '''
        :param pc: input numpy array of Nx3
        :return: sampled_pc of Mx3
        '''
        cloud = pcl.PointCloud(pc)
        sor = cloud.make_voxel_grid_filter()
        sor.set_leaf_size(leaf_size, leaf_size, leaf_size)
        cloud_filtered = sor.filter()
        sampled_pc = np.asarray(cloud_filtered)
        
        return sampled_pc
    
    def sample_pc_wrapper(self, pc):
        '''
        ensure that the sampled pc is more than a certain amount
        '''
        retry_counter = 0
        
        sampled_pc = self.sample_pc(pc, self.leaf_size)
        while sampled_pc.shape[0] < self.minimum_pc_num:
            retry_counter += 1
            leaf_size = self.leaf_size - 0.04*retry_counter
            if leaf_size <= 0:
                break
            sampled_pc = self.sample_pc(pc, leaf_size)
        
        return sampled_pc
    
    
def axisEqual3D(ax):
    extents = np.array([getattr(ax, 'get_{}lim'.format(dim))() for dim in 'xyz'])
    sz = extents[:,1] - extents[:,0]
    centers = np.mean(extents, axis=1)
    maxsize = max(abs(sz))
    r = maxsize/2
    for ctr, dim in zip(centers, 'xyz'):
        getattr(ax, 'set_{}lim'.format(dim))(ctr - r, ctr + r)

In [None]:
def read_txt_as_list(txt_file):
    with open(txt_file, 'r') as f:
        subset = [int(x) for x in f.readlines()]
    return subset


root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'train'
output_folder = 'frames_' + phase

data = np.load(os.path.join(root, 'data_' + phase + '.npz'), mmap_mode=None)
print(data['pairs'].shape)
print(data['offsets'].shape)

offsets = data['offsets']
# for i in range(len(offsets)):
#     pc_np = data['points'][offsets[i]:offsets[i+1]]
#     np.save(os.path.join(root, output_folder, '%d.npy'%i), pc_np)
#     if i % 100 ==0:
#         print(phase+' %d'%i)

i = data['offsets'].shape[0] - 1 
pc_np = data['points'][offsets[i]:, :]
np.save(os.path.join(root, output_folder, '%d.npy'%i), pc_np)

In [None]:
# get surface normal
def Surface_normals(cloud):
    ne = cloud.make_NormalEstimation()
    tree = cloud.make_kdtree()
    ne.set_SearchMethod(tree)
#     ne.set_RadiusSearch(2)
    ne.set_KSearch(9)
    cloud_normals = ne.compute()
    return cloud_normals

root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'train'
output_folder = 'frames_' + phase

file_names = os.listdir(os.path.join(root, output_folder))
print(len(file_names))
# for i, fname in enumerate(file_names):  
#     pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]
    
#     cloud = pcl.PointCloud(pc_np.astype(np.float32))    
#     sn = Surface_normals(cloud)
#     sn_np = np.asarray(sn.to_array(), dtype=np.float32)  # Nx4, nx,ny,nz,curvature
    
#     output_np = np.concatenate((pc_np, sn_np), axis=1)  # Nx7
#     np.save(os.path.join(root, output_folder, '%d.npy'%i), output_np)
    
#     if i % 100 ==0:
#         print(phase+' %d'%i)

i = len(file_names) - 1
pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]

cloud = pcl.PointCloud(pc_np.astype(np.float32))    
sn = Surface_normals(cloud)
sn_np = np.asarray(sn.to_array(), dtype=np.float32)  # Nx4, nx,ny,nz,curvature

output_np = np.concatenate((pc_np, sn_np), axis=1)  # Nx7
np.save(os.path.join(root, output_folder, '%d.npy'%i), output_np)

In [None]:
# prepare negative mining and other information
# info -> 'pairs', 'icp', sample_num

import pickle

def save_obj(obj, name):
    with open(name, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'val'
output_folder = 'frames_' + phase
info_obj_name = 'info_' + phase + '.pkl'

if phase == 'train':
    sample_num = 2638
elif phase == 'test':
    sample_num = 1054
elif phase == 'val':
    sample_num = 972

data = np.load(os.path.join(root, 'data_' + phase + '.npz'), mmap_mode=None)
pairs_np = data['pairs']
icp_np = data['icp']

print(pairs_np.shape)
print(icp_np.shape)

# for each frame, find the overlapped frames
positive_list = []
for i in range(sample_num):
    positive_list.append([])
for i in range(sample_num):
    for j in range(pairs_np.shape[0]):
        if pairs_np[j, 0] == i:
            positive_list[i].append(pairs_np[j, 1])
        if pairs_np[j, 1] == i:
            positive_list[i].append(pairs_np[j, 0])

# remove repeated, find frames that with no neighbors
# according to experiments of 2018-10-18, every frame has a neighbor
for i in range(sample_num):
    positive_list[i] = list(set(positive_list[i]))
    if len(positive_list) == 0:
        print('empty: %d' % i)

# save to python dictionary
info_dict = {'pairs_np': pairs_np, 'icp_np': icp_np, 'positive_list': positive_list, 'sample_num': sample_num}
save_obj(info_dict, os.path.join(root, info_obj_name))

In [None]:
def cart2hom(pts_3d):
    ''' Input: nx3 points in Cartesian
        Output: nx4 points in Homogeneous by pending 1
    '''
    n = pts_3d.shape[0]
    pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
    return pts_3d_hom

def hom2cart(pts_3d_hom):
    return pts_3d_hom[:, 0:3] / pts_3d_hom[:, 3:4]

# validate the icp and neighbor data
root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'train'
output_folder = 'frames_' + phase
info_obj_name = 'info_' + phase + '.pkl'

info_dict = load_obj(os.path.join(root, info_obj_name))
# print(info_dict['pairs_np'])
# print(info_dict['icp_np'])
# print(info_dict['positive_list'])
# print(info_dict['sample_num'])

pairs_np = info_dict['pairs_np']
icp_np = info_dict['icp_np']
positive_list = info_dict['positive_list']
sample_num = info_dict['sample_num']

print(len(positive_list))

i = 888
anc_idx = pairs_np[i, 0]
pos_idx = pairs_np[i, 1]
assert anc_idx in positive_list[pos_idx]
assert pos_idx in positive_list[anc_idx]

anc_pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%anc_idx))[:, 0:3]
pos_pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%pos_idx))[:, 0:3]

# fig1 = plt.figure()
# ax = Axes3D(fig1)
# ax.scatter(anc_pc_np[:,0].tolist(), anc_pc_np[:,1].tolist(), anc_pc_np[:,2].tolist(), s=5, c=[0, 0, 0])
# axisEqual3D(ax)

# fig2 = plt.figure()
# ax = Axes3D(fig2)
# ax.scatter(pos_pc_np[:,0].tolist(), pos_pc_np[:,1].tolist(), pos_pc_np[:,2].tolist(), s=5, c=[0, 0, 0])
# axisEqual3D(ax)

transformed_anc_pc_np = hom2cart(np.dot(icp_np[i], cart2hom(anc_pc_np).T).T)
fig3 = plt.figure()
ax = Axes3D(fig3)
ax.scatter(pos_pc_np[:,0].tolist(), pos_pc_np[:,1].tolist(), pos_pc_np[:,2].tolist(), s=5, c=[1, 0, 0])
ax.scatter(transformed_anc_pc_np[:,0].tolist(), 
           transformed_anc_pc_np[:,1].tolist(), 
           transformed_anc_pc_np[:,2].tolist(), 
           s=5, c=[0, 0, 1])
axisEqual3D(ax)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')

plt.ion()
plt.show()

In [None]:
root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'train'
output_folder = 'frames_' + phase

file_names = os.listdir(os.path.join(root, output_folder))
print(len(file_names))
pc_num_list = []
large_pc_idx_list = []

pc_np_list = []
for i, fname in enumerate(file_names):    
    pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]
    
#     fig = plt.figure()
#     ax = Axes3D(fig)
#     ax.scatter(pc_np[:,0].tolist(), pc_np[:,1].tolist(), pc_np[:,2].tolist(), s=0.1, c=[0.5,0.5,0.5])
#     axisEqual3D(ax)

#     plt.ion()
#     plt.show()

    pc_num_list.append(pc_np.shape[0])
    pc_np_list.append(pc_np)
    
    if pc_np.shape[0] > 8000:
        large_pc_idx_list.append(i)
    
    
pc_num_np = np.asarray(pc_num_list)
print(np.mean(pc_num_np))
print(np.max(pc_num_np))
print(np.min(pc_num_np))

pc_np_all = np.concatenate(pc_np_list, axis=0)
print('x_max: %f, x_min: %f, y_max: %f, y_min: %f, z_max: %f, z_min: %f' % 
      (np.max(pc_np_all[:, 0]), np.min(pc_np_all[:, 0]), 
       np.max(pc_np_all[:, 1]), np.min(pc_np_all[:, 1]), 
       np.max(pc_np_all[:, 2]), np.min(pc_np_all[:, 2])))

print('x_mean: %f, y_mean: %f, z_mean: %f' % 
      (np.mean(pc_np_all[:, 0]), 
       np.mean(pc_np_all[:, 1]), 
       np.mean(pc_np_all[:, 2])))


plt.hist(pc_num_np, bins='auto')
plt.show()

n = 4096
print(np.sum(pc_num_np<n) / pc_num_np.shape[0])

In [None]:
print(large_pc_idx_list)

In [None]:
# investigate how many points should be chosen for training
i = 2566
pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]
print(pc_np.shape)

choice_idx = np.random.choice(pc_np.shape[0], 5000, replace=False)
pc_np_sampled = pc_np[choice_idx]
    
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(pc_np[:,0].tolist(), pc_np[:,1].tolist(), pc_np[:,2].tolist(), s=1, c=[0, 0, 0])
ax.scatter(pc_np_sampled[:,0].tolist(), pc_np_sampled[:,1].tolist(), pc_np_sampled[:,2].tolist(), s=10, c=[1, 0, 0])
axisEqual3D(ax)

plt.ion()
plt.show()

In [5]:
# investigate the statistics of the dataset
root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'test'
output_folder = 'frames_' + phase

sampler = FarthestSampler()

file_names = os.listdir(os.path.join(root, output_folder))
print(len(file_names))

mean_list = []
min_list = []
max_list = []
for i, fname in enumerate(file_names):    
    pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]

#     ###### knn analysis
#     knn_k=2
#     knn = KNNBuilder(k=knn_k)

#     start_t = timeit.default_timer()
#     index = knn.build_nn_index(np.ascontiguousarray(pc_np[:, 0:3], dtype=np.float32))
#     D, I = knn.search_nn(index, np.ascontiguousarray(pc_np[:, 0:3], dtype=np.float32), k=knn_k)
#     stop_t = timeit.default_timer()
#     print(stop_t - start_t)

#     D = np.sqrt(np.fabs(D))
#     D = D[:, knn_k-1:knn_k]
#     print(D.shape)
#     print('pc k=%d - mean %f, max %f, min %f' % (knn_k, np.mean(D), np.max(D), np.min(D)))
#     ###### knn analysis
    
    farthest_pts = sampler.sample(pc_np, 768)
    # nearest neighbor analysis for nodes
    knn_k=12
    knn = KNNBuilder(k=knn_k)

    index = knn.build_nn_index(np.ascontiguousarray(farthest_pts[:, 0:3], dtype=np.float32))
    D, I = knn.search_nn(index, np.ascontiguousarray(farthest_pts[:, 0:3], dtype=np.float32), k=knn_k)

    D = np.sqrt(np.fabs(D))
    D = D[:, knn_k-1:knn_k]
    
    mean_list.append(np.mean(D))
    max_list.append(np.max(D))
    min_list.append(np.min(D))
    
    
node_knn_mean = np.mean(np.asarray(mean_list))
node_knn_min = np.mean(np.asarray(min_list))
node_knn_max = np.mean(np.asarray(max_list))
print('node k=%d - mean %f, max %f, min %f \n' % (knn_k, node_knn_mean, node_knn_max, node_knn_min))

1054
node k=12 - mean 0.214459, max 0.852333, min 0.141794 



In [None]:
# how many points within a 30cm neighborhood
root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'test'
output_folder = 'frames_' + phase

sampler = FarthestSampler()

file_names = os.listdir(os.path.join(root, output_folder))
print(len(file_names))

r = 0.8
within_r_num_list = []
for i, fname in enumerate(file_names):    
    pc_np = np.load(os.path.join(root, output_folder, '%d.npy'%i))[:, 0:3]  # Nx3
    
    choice_idx = np.random.choice(pc_np.shape[0], int(pc_np.shape[0]/2), replace=False)
    pc_np_sampled = pc_np[choice_idx]
    farthest_pts = sampler.sample(pc_np_sampled, 512)  # Mx3
    
    farthest_pts_Mx1x3 = np.expand_dims(farthest_pts, axis=1)
    pc_np_1xNx3 = np.expand_dims(pc_np, axis=0)
    
    dist_np = np.linalg.norm(farthest_pts_Mx1x3 - pc_np_1xNx3, axis=2)
    ball_mask = dist_np < r  # MxN
    within_r_num_list.append(np.sum(ball_mask) / farthest_pts.shape[0])
    
#     if i > 100:
#         break
        
print(np.mean(np.asarray(within_r_num_list)))
print(np.max(np.asarray(within_r_num_list)))
print(np.min(np.asarray(within_r_num_list)))

In [None]:
root = '/ssd/dataset/SceneNN-DS-compact'
phase = 'train'
output_folder = 'frames_' + phase

data = np.load(os.path.join(root, 'data_' + phase + '.npz'), mmap_mode=None)
pairs_np = data['pairs']
offsets = data['offsets']
icp_np = data['icp']
print(pairs_np)
print(offsets.shape)
print(icp_np.shape)