# Notebook for identifying and removing bottlenecks from ICET 

In [1]:
from vedo import *
import os
from ipyvtklink.viewer import ViewInteractiveWidget
import pykitti
import numpy as np
import tensorflow as tf
import time

#limit GPU memory ------------------------------------------------
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
  try:
    memlim = 12*1024
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memlim)])
  except RuntimeError as e:
    print(e)
#-----------------------------------------------------------------
# tf.config.set_visible_devices([], 'GPU') #run on CPU only -- seems to actually execute main parts of code faster here...

from tensorflow.math import sin, cos, tan
import tensorflow_probability as tfp
from ICET_spherical import ICET
from utils import R_tf
from metpy.calc import lat_lon_grid_deltas

%load_ext autoreload
%autoreload 2
%autosave 180
# %matplotlib notebook

# %%bash
# # python -m cProfile scan_match.py
# python scan_match.py

2022-12-06 15:53:47.488622: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-06 15:53:47.584271: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-12-06 15:53:47.959506: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/derm/anaconda3/envs/py39/lib/python3.9/site-packages/cv2/../../lib64:
2022-12-06 15:53:47.959560: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_p

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-12-06 15:53:48.511589: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-06 15:53:48.511761: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-06 15:53:48.511891: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-06 15:53:48.847811: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-06 15:53:48.847985: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

Autosaving every 180 seconds


In [23]:
basepath = '/media/derm/06EF-127D2/KITTI'
# sequence = '03' #forest
sequence = '09' #trees and small town
dataset = pykitti.odometry(basepath, sequence)
velo1 = dataset.get_velo(400)
c1 = velo1[:,:3]
velo2 = dataset.get_velo(401)
c2 = velo2[:,:3]

# fn1 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan13.txt"
# c1 = np.loadtxt(fn1)
# fn2 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan14.txt"
# c2 = np.loadtxt(fn2)

it = ICET(cloud1 = c1, cloud2 = c2, fid = 50, niter = 5, 
           draw = False, group = 2, RM = False, DNN_filter = False)
ViewInteractiveWidget(it.plt.window)

Ground truth poses are not avaialble for sequence 09.

 converting to spherical took 0.019266366958618164 
 total:  0.019269704818725586

 getting bounds took 0.019212722778320312 seconds

 took  0.02342844009399414 seconds to get points in cluster

 getting spherical grid 0.06996917724609375 
 total:  0.08925962448120117

 fit_gaussian for scan 1 0.015787124633789062 
 total:  0.10505485534667969

 took  0.014899492263793945 seconds to get points in cluster

 took  0.015195131301879883 seconds to get points in cluster

 took  0.014623403549194336 seconds to get points in cluster

 got U and L cluster 0.05681180953979492 
 total:  0.16188287734985352

 ~~~~~~~~~~~~~~ 
 transforming scan2 0.0020759105682373047 
 total:  0.1639714241027832 
 ~~~~~~~~~~~~~~

 took  0.023812532424926758 seconds to get points in cluster

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.03985929489135742 
 total:  0.203843355178833 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor([ 0.2872482   0.021178

AttributeError: 'ICET' object has no attribute 'plt'

# get_points_in_cluster()

In [756]:
def gpc_old(cloud, occupied_spikes, bounds):
    """ returns ragged tensor containing the indices of points in <cloud> in each cluster 
        cloud = point cloud tensor
        occupied_spikes = tensor containing idx of spikes corresponding to bounds
        bounds = tensor containing min and max radius for each occupied spike
    """
    st = time.time()
    fid_theta = 50
    fid_phi = 50//3
    thetamin = -np.pi
    thetamax = np.pi #-  2*np.pi/self.fid_theta
    phimin =  3*np.pi/8
    phimax = 7*np.pi/8

    edges_phi = tf.linspace(phimin, phimax, fid_phi) #was this for regular cells
    bins_phi = tfp.stats.find_bins(cloud[:,2], edges_phi)

    edges_theta = tf.linspace(thetamin, thetamax, fid_theta + 1)
    bins_theta = tfp.stats.find_bins(cloud[:,1], edges_theta)

    spike_idx = tf.cast(bins_theta*(fid_phi-1) + bins_phi, tf.int32)
#     print(spike_idx[:50])
    
    #get idx of spike for each applicable point
    cond1 = spike_idx == occupied_spikes[:,None] #match spike IDs
    cond2 = cloud[:,0] < tf.cast(bounds[:,1][:,None], tf.float32) #closer than max bound
    cond3 = cloud[:,0] > tf.cast(bounds[:,0][:,None], tf.float32) #further than min bound
#     #this is the most computationally expensive part ---------
#     before = time.time()
#     cond1 = tf.math.equal(spike_idx, occupied_spikes[:,None])  #find where spike_idx matches spike ID
# #     print("\n took ", time.time() - before , "seconds to do cond 1" )
#     #---------------------------------------------------------
    # cond2 = tf.math.less(cloud[:,0], bounds[:,1][:,None]) #closer than max bound
    # cond3 = tf.math.greater(cloud[:,0], bounds[:,0][:,None]) #further than min bound

    inside1 = tf.where(tf.math.reduce_all(tf.Variable([cond1, cond2, cond3]), axis = 0))
    numPtsPerCluster = tf.math.bincount(tf.cast(inside1[:,0], tf.int32))
    inside1 = tf.RaggedTensor.from_value_rowids(inside1[:,1], inside1[:,0])

    print("\n took ", time.time() -st , "seconds to get points in cluster with old method" )
    return(inside1, numPtsPerCluster)

In [816]:
def gpc_new(cloud, occupied_spikes, bounds):
    """New method of finding which voxel each point in a scan falls into"""
    
    st = time.time()

    fid_theta = 50
    fid_phi = 50//3
    
    thetamin = -np.pi
    thetamax = np.pi
    phimin =  3*np.pi/8
    phimax = 7*np.pi/8

    edges_phi = tf.linspace(phimin, phimax, fid_phi) #was this for regular cells
    bins_phi = tfp.stats.find_bins(cloud[:,2], edges_phi)

    edges_theta = tf.linspace(thetamin, thetamax, fid_theta + 1)
    bins_theta = tfp.stats.find_bins(cloud[:,1], edges_theta)

    spike_idx = tf.cast(bins_theta*(fid_phi-1) + bins_phi, tf.int32)
    print("\n spike_idx \n", spike_idx[:])
    
    #first get ragged tensor grouping all points by their respective horiz/vertial angular bins
    #re-arrange all spike idx in ascending order
    spike_idx_ascending_idx = tf.argsort(spike_idx)
    spike_idx_ascending = tf.gather(spike_idx, spike_idx_ascending_idx)
    print("\n spike_idx_ascending \n", spike_idx_ascending)
    
    u, u_idx = tf.unique(spike_idx_ascending)
#     print("\n u \n", u[:10], "\n", tf.shape(u))
#     print("\n u_idx \n", u_idx[:100], "\n", tf.shape(u_idx))
    
    original_index = tf.range(len(u_idx))
#     print("\n original_index \n", original_index) 
    sorted_idx = tf.gather(original_index, spike_idx_ascending_idx)
    print("\n sorted_idx \n", sorted_idx) 

    #correclty clusterd by radial bounds (BUT in wrong order)
    inside1 = tf.RaggedTensor.from_value_rowids(sorted_idx, u_idx)
    
    print("\n occupied_spikes \n", occupied_spikes)
    occupied_in_ascending_order_idx = tf.argsort(occupied_spikes)
    print("\n occupied_in_ascending_order_idx \n", occupied_in_ascending_order_idx )
    
    inside1 = tf.gather(inside1, occupied_in_ascending_order_idx)
    
    # then remove points that are too far inside or outside radial bounds of each cell    
    print("\n took ", time.time() -st , "seconds to get points in cluster with new method" )
#     inside1 = None
    numPtsPerCluster = None
    return(inside1, numPtsPerCluster)

In [828]:
#old method takes ~0.04s (slow)
old_in1, old_npc = gpc_old(it.cloud2_tensor_spherical, it.occupied_spikes, it.bounds)
# print(it.occupied_spikes)
print("\n old_in1 \n", old_in1[0][:100])
# print("\n old_in1 \n", tf.shape(old_in1))
print("\n ------------------------------ \n")

#new method (should be significantly faster)
new_in1, new_npc = gpc_new(it.cloud2_tensor_spherical, it.occupied_spikes, it.bounds)
print("\n new_in1 \n", new_in1[0][:100])
# print("\n new_in1 \n", tf.shape(new_in1))


 took  0.04666256904602051 seconds to get points in cluster with old method

 old_in1 
 tf.Tensor(
[   25    93   526   610   629   647   668   686  1229  1850  2160  2182
  2312  2453  2464  3386  3644  3730  4224  4474  5563  6136  6866  6915
  7141  7331  7844  8392  8835  8998  9442  9467  9700  9786 10310 10427
 10717 11405 11760 11848 12154 12369 12378 12617 12821 13034 13927 14437
 14546 14815 17100 17295 17607 17691 18299 18303 18566 19482 19844 19876
 20034 20398 20863 21944 22520 22894 23440 24024 24028 24076 24088 24517
 25455 25703 26031 26068 26190 26304 26382 26510 26580 27113 27610 27632
 27697 27821 28203 28337 28952 29708 30075 30109 30113 30721 30776 30886
 31613 32265 32428 32597], shape=(100,), dtype=int64)

 ------------------------------ 


 spike_idx 
 tf.Tensor([199 574 440 ... 695 319 184], shape=(124598,), dtype=int32)

 spike_idx_ascending 
 tf.Tensor([  3   3   3 ... 741 741 742], shape=(124598,), dtype=int32)

 sorted_idx 
 tf.Tensor([    23     29     89 

In [829]:
plt1 = Plotter(N = 1, axes = 4, bg = (1, 1, 1), interactive = True)
disp = []

pts_old = tf.gather(it.cloud2_tensor_OG, old_in1[0][:100]).numpy()
pts_new = tf.gather(it.cloud2_tensor_OG, new_in1[0][:100]).numpy()

disp.append(Points(pts_old, c = 'red', r = 6, alpha = 1))
disp.append(Points(pts_new, c = 'blue', r = 6, alpha = 1))
disp.append(Points(it.cloud2_tensor_OG, c = 'black', r = 3, alpha = 1))

plt1.show(disp, "debug indexing for fast voxel search")
ViewInteractiveWidget(plt1.window)

ViewInteractiveWidget(height=1043, layout=Layout(height='auto', width='100%'), width=1280)

In [777]:
#test TF ragged from value row IDs
values = tf.random.uniform([6])
print("values \n", values)
# row_ids = tf.constant([1,2,3,4,0,5], dtype = tf.int32)
row_ids = tf.constant([0,5,2,3,4,5], dtype = tf.int32)

row_id_indices_sorted = tf.argsort(row_ids)
row_ids_sorted = tf.gather(row_ids, row_id_indices_sorted)
values_sorted = tf.gather(values, row_id_indices_sorted)
print(values_sorted)

rag = tf.RaggedTensor.from_value_rowids(values_sorted, row_ids_sorted)
print("\n rag \n", rag)

print("\n test \n", tf.gather(rag, tf.constant([2, 2, 3])))

values 
 tf.Tensor([0.29603457 0.7836784  0.6381947  0.5451517  0.35780048 0.7451891 ], shape=(6,), dtype=float32)
tf.Tensor([0.29603457 0.6381947  0.5451517  0.35780048 0.7836784  0.7451891 ], shape=(6,), dtype=float32)

 rag 
 <tf.RaggedTensor [[0.29603457], [], [0.6381947], [0.5451517], [0.35780048],
 [0.7836784, 0.7451891]]>

 test 
 <tf.RaggedTensor [[0.6381947],
 [0.6381947],
 [0.5451517]]>


# get_cluster()

In [None]:
def gt2(rads, thresh = 0.5, mnp = 100):
    """testing new method of finding radial bins for spherical voxels"""
    
    before = time.time()

    max_buffer = 0.2 

    if len(tf.shape(rads)) < 2:
        rads = rads[:,None]

    OG_rads = rads #hold on to OG rads
    #replace all zeros in rads (result of converting ragged -> standard tensor) with some arbitrarily large value
    mask = tf.cast(tf.math.equal(rads, 0), tf.float32)*1000
    rads = rads + mask
    # print(rads)

    #sort in ascending order for each column in tensor
    top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
#     print("\n top_k \n", top_k[1])
    rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
    rads = tf.reverse(rads, axis = tf.constant([0]))
#     print("rads \n", rads)

    # calculate the forward difference between neighboring points
    z = tf.zeros([1, tf.shape(rads)[1].numpy()])
    shifted = tf.concat((rads[1:], z), axis = 0)
    diff = shifted - rads

    # #find where difference jumps
    jumps = tf.where(diff > thresh)
#     print("\n jumps \n", jumps) #[idx of jump, which spike is jumping]

    #----------------------------------------------------------------------
    #not sure if actually needed...
    #get indexes of all used spikes
    used = jumps[:,1][None,:]
    # print("used", used)
    biggest = tf.math.reduce_max(used, axis = 1).numpy()[0]
    # print("biggest", biggest)
    all_spikes = tf.cast(tf.linspace(0,biggest,biggest+1), tf.int64)[None,:] #list all spikes total
    # print("all_spikes", all_spikes)

    #find differnce
    missing = tf.sets.difference(all_spikes, used).values[None,:]
    # print("\n missing", missing)
    # z = tf.zeros(tf.shape(missing), dtype = tf.int64) #wrong...
    # z = 51*tf.ones(tf.shape(missing), dtype = tf.int64) #wrong...
    # print("z", z)

    #z should be this...
    # print("\n OG_rads", OG_rads)
    # ends = tf.math.argmax(OG_rads, axis = 0) #wrong -> not max arg, last nonzero argument!!
    zero = tf.constant(0, dtype = tf.float32)
    ends = tf.math.reduce_sum(tf.cast(tf.not_equal(OG_rads, zero), tf.int64), axis = 0) #correct
    # print("\n ends", ends)

    test = tf.gather(ends, missing[0])  #get index of last element of missing jump section
    # print("\n test", test)
    z = test[None,:]
    z -= 2 #fixes indexing bug
    # print("z", z)

    missing = tf.transpose(tf.concat((z, missing), axis = 0))
    # print(missing)

    jumps = tf.concat((jumps, missing), axis = 0) #concat missing stuff back at the end of jumps
#     print("\n jumps after fix", jumps)
    #----------------------------------------------------------------------
    
    print("\n jumps: \n", jumps)
    
    #find where the first large cluster occurs in each spike
   

    
    bounds = None

    return(bounds, jumps)

In [None]:
from utils import get_cluster, get_cluster_fast
# print("rads: \n", it.rads)

s = time.time()
bounds_old = get_cluster(it.rads, mnp = it.min_num_pts)
print("\n took", time.time() - s, " s with old method \n")
print("\n bounds_old: \n", bounds_old[:10])
print(np.shape(bounds_old))

s = time.time()
# bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)
bounds_new = get_cluster_fast(it.rads, mnp = it.min_num_pts)
print("\n bounds_new: \n", bounds_new[:10])
print(" \n took", time.time() - s, " s with new method")

In [None]:
#identifying location of jumps without looping
print("old slow soln shape: \n", tf.shape(bounds_old)) #want to produce this same shape!!!
# print("\n bounds_old: \n", bounds_old[:10])

bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)

#get all radial measurements
#(temp-- already done inside function)-----------------------------
mask = tf.cast(tf.math.equal(it.rads, 0), tf.float32)*1000
rads = it.rads + mask
#sort in ascending order for each column in tensor
top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
rads = tf.reverse(rads, axis = tf.constant([0]))
# print("\n rads: \n", rads[:10])
# print("\n rads: \n", np.shape(rads))
# print("\n it.rads \n", it.rads)
#------------------------------------------------------------------

# print("\n jumps: \n", tf.shape(jumps))
# print("\n jumps: \n", jumps) #[idx of jump, which spike is jumping]

# y, idx = tf.unique(jumps[:,0]) #was this
jumps_temp = tf.gather(jumps, tf.argsort(jumps[:,1]), axis=0) #reorder based on index
y, idx = tf.unique(jumps_temp[:,1]) #test
print("\n y \n", y[:15], "\n", tf.shape(y), "\n \n idx \n", idx[:15], "\n", tf.shape(idx))
# print("\n jumps_temp \n", jumps_temp[:15])
# print("\n jumps[:,_]: \n", jumps[:,0])

# get ragged tensor containing indices where jumps occur inside each wedge shaped voxel
# jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], idx) #WAS THIS --wrong!!

jumps_rag = tf.RaggedTensor.from_value_rowids(jumps_temp[:,0], jumps_temp[:,1]) #TEST - should be this??
# jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], jumps[:,0]) #TEST
# print("\n jumps_rag \n", jumps_rag[:15])
print("\n rads[0,_] \n", rads[:30,0])


# append 0 to beginning of each ragged elemet of jumps_rag
zeros = tf.zeros(tf.shape(jumps_rag)[0])[:,None]
zeros = tf.cast(tf.RaggedTensor.from_tensor(zeros), tf.int64)
jumps_rag = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print("\n jumps_rag \n", jumps_rag[:15])
# print("\n jumps_rag \n", jumps_rag.to_tensor())

#get num points between each jump 
npts_between_jumps = tf.experimental.numpy.diff(jumps_rag.to_tensor())
# print("\n npts_between_jumps:\n ",npts_between_jumps[:10,:10])
# print("\n npts_between_jumps:\n ",npts_between_jumps)

#flag spikes where all npts_between_jumps are less than mnp
biggest_jump = tf.math.reduce_max(npts_between_jumps, axis = 1)
# print("\n biggest_jump \n", biggest_jump)
mnp = 100 #minumum number of points per cluster (defined in ICET class)
good_clusters = tf.cast(tf.math.greater(biggest_jump, mnp), tf.int32)
# good_clusters = tf.RaggedTensor.from_value_rowids(good_clusters, y).to_tensor()[:,0]  #fill in skipped indices
print("\n good_clusters (hold on to this for later) \n", good_clusters)

#get idx within jumps_rag corresponding to first sufficiently large jump
big_enough = tf.cast(tf.math.greater(npts_between_jumps, 100), tf.int32)
# print(big_enough[:10])
first_big_enough = tf.math.argmax(big_enough, axis = 1)
print("\n first_big_enough: \n", first_big_enough)
# print("\n first_big_enough: \n", first_big_enough)

print("\n everything looks good up to this point :)")

In [None]:
#get inner and outer (simple way-- just use radial measurements of inner and outermost points in cluster)
#get index of radial measurements that defines inner bounds of voxel 
inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims=1) + 1
# print("\n inner_idx: \n", inner_idx, "\n")
inner  = tf.gather(tf.transpose(rads), inner_idx, batch_dims=1)
# print("\n inner: \n", inner)

outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1)
# print("\n outer_idx: \n", outer_idx, "\n")
outer  = tf.gather(tf.transpose(rads), outer_idx, batch_dims=1)
# print("\n outer: \n", outer)

# bounds = np.array([inner, outer]).T
bounds = tf.concat((inner[:,None], outer[:,None]), axis = 1)
bounds = tf.cast(good_clusters[:,None], tf.float32) * bounds
print(bounds[:10])

In [None]:
#get inner and outer as described in spherical paper
#  (max half distance betweeen last in cluster and first point outside cluster)

max_buffer = 3.0

inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims=1) # + 1 #DEBUG -- do we need +1 here??
inner_radii  = tf.gather(tf.transpose(rads), inner_idx, batch_dims=1)
#get radial distance of closest point on near side of cluster
next_inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough-1, batch_dims=1)
next_inner_radii = tf.gather(tf.transpose(rads), next_inner_idx, batch_dims=1) 

# print(first_big_enough)
# print(inner_idx[:15])
# print(next_inner_idx[:15])
# print(inner_radii[:15])
# print(next_inner_radii[:15])
# print(test[:15])

#will be zero when inner idx occurs on first element of spike, otherwise correct soln
inner_skip_dist = inner_radii - next_inner_radii
# print("before: \n",inner_skip_dist[:15])
#of these nonzero distances, some are smaller than max_buffer -> leave as is, all else set to max_buffer
too_big = tf.cast(tf.math.less(inner_skip_dist*2, max_buffer), tf.float32)
# print(too_big[:15])
inner_skip_dist = inner_skip_dist*too_big + (1-too_big)*max_buffer
# print("after: \n",inner_skip_dist[:15])
temp = tf.cast(tf.math.equal(inner_skip_dist, 0), tf.float32)*max_buffer #set all others to max_buffer
# print(temp[:15])
# print(- inner_skip_dist - temp)
inner = inner_radii - inner_skip_dist - temp

#good up to here-----------------


outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1) - 1
outer_radii  = tf.gather(tf.transpose(rads), outer_idx, batch_dims=1)
next_outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1) +1
next_outer_radii = tf.gather(tf.transpose(rads), next_outer_idx, batch_dims=1) 

# print(outer_idx[:15])
# print(next_outer_idx[:15])
print(outer_radii[:15])
print(next_outer_radii[:15])

outer_skip_dist = next_outer_radii - outer_radii
# print(outer_skip_dist[:15])
too_big = tf.cast(tf.math.less(outer_skip_dist*2, max_buffer), tf.float32)
print(too_big[:15])

outer_skip_dist = outer_skip_dist*too_big + (1-too_big)*max_buffer
print(outer_skip_dist[:15])

outer = outer_radii + outer_skip_dist
print(outer[:15])

# fit_gaussian()

In [None]:
def fg2(cloud, rag, npts):
    """new method of fitting gaussian to better handle ragged input data"""
    numSamples = 3
    
    coords = tf.gather(cloud, rag)
    mu = tf.math.reduce_mean(coords, axis = 1)[:,None]
#     mu = tf.math.reduce_mean(coords, axis = 1) #old
#     print(mu)

#   TODO: try randomly sampling 30 points from each ragged cell, use reduced num pts to calculate covariance
#     subsampled = tf.map_fn(sample, it.inside2) #works but SLOW
#     subsampled = tf.map_fn(sample, it.inside2, parallel_iterations=True)
#     subsampled = tf.gather(rag,tf.range(tf.shape(rag)[0]))[:numSamples] #wrong
#     print(subsampled)

    xpos = tf.gather(cloud[:,0], rag)
    ypos = tf.gather(cloud[:,1], rag)
    zpos = tf.gather(cloud[:,2], rag)
#     c = tfp.stats.covariance(xpos.to_tensor(), ypos.to_tensor())

#     print(xpos)
    idx = tf.range(30)
    xpos = tf.gather(xpos, idx, axis = 1)
    ypos = tf.gather(ypos, idx, axis = 1)
    zpos = tf.gather(zpos, idx, axis = 1)
    print(xpos)

    xx = tf.math.reduce_sum(tf.math.square(xpos - mu[:,:,0] ), axis = 1)/npts
    yy = tf.math.reduce_sum(tf.math.square(ypos - mu[:,:,1] ), axis = 1)/npts
    zz = tf.math.reduce_sum(tf.math.square(zpos - mu[:,:,2] ), axis = 1)/npts
    xy = tf.math.reduce_sum( (xpos - mu[:,:,0])*(ypos - mu[:,:,1]), axis = 1)/npts  #+
    xz = tf.math.reduce_sum( (xpos - mu[:,:,0])*(zpos - mu[:,:,2]), axis = 1)/npts #-
    yz = tf.math.reduce_sum( (ypos - mu[:,:,1])*(zpos - mu[:,:,2]), axis = 1)/npts #-

    sigma = tf.Variable([xx, xy, xz,
                        xy, yy, yz,
                        xz, yz, zz]) 
    sigma = tf.reshape(tf.transpose(sigma), (tf.shape(sigma)[1] ,3,3))
        
#     mu = None
    return(mu, sigma)

@tf.function
def sample(x, samples=3):
  """https://stackoverflow.com/questions/71073873/sample-from-ragged-tensor"""  
  length = tf.shape(x)[0]
#   was this
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.random.shuffle(tf.range(length))[:samples]))
 
#   test
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.range(length))[:samples])
  x = tf.gather(x,tf.range(length))[:samples]

    
  return x

In [None]:
s = time.time()
mu2, sigma2 = it.fit_gaussian(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print("\n took", time.time() - s, " s with old method")

s = time.time()
mu2, sigma2 = fg2(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print(" \n took", time.time() - s, " s with new method")

# print(it.npts2)
# print(it.inside2)

In [None]:
# vect = it.inside2
vect = tf.ragged.constant([[],[1,2,3,4],[5,4,3,2,1],[6],[99],[7,8,9,10,11,12,13]])
# print(tf.shape(vect)[0])
print("vect", vect)
c = tf.map_fn(sample, vect)
# print(c)

#wrong
# test = tf.gather(vect,tf.range(tf.shape(vect)[0]))[:3]
idx = tf.range(3)
print("\n idx", idx)
test = tf.gather(vect, idx , axis = 1)
print("\n test", test) #NOTE: indices with too few elements produce unexpected behavior
                        #that doesn't matter since they get suppressed anyways
    
vec2 = tf.random.categorical(vect, 2)
    