# Notebook for identifying and removing bottlenecks from ICET implementation

In [3]:
from vedo import *
import os
from ipyvtklink.viewer import ViewInteractiveWidget
import pykitti
import numpy as np
import tensorflow as tf
import time

#limit GPU memory ------------------------------------------------
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
  try:
    memlim = 4*1024
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memlim)])
  except RuntimeError as e:
    print(e)
#-----------------------------------------------------------------
# tf.config.set_visible_devices([], 'GPU') #run on CPU only -- seems to actually execute main parts of code faster here...

from tensorflow.math import sin, cos, tan
import tensorflow_probability as tfp
from ICET_spherical import ICET
from utils import R_tf
from metpy.calc import lat_lon_grid_deltas

%load_ext autoreload
%autoreload 2
%autosave 180
# %matplotlib notebook

# %%bash
# # python -m cProfile scan_match.py
# python scan_match.py

[]


Autosaving every 180 seconds


In [53]:
# basepath = '/media/derm/06EF-127D1/KITTI'
# # sequence = '03' #forest
# sequence = '09' #trees and small town
# dataset = pykitti.odometry(basepath, sequence)
# velo1 = dataset.get_velo(400)
# c1 = velo1[:,:3]
# velo2 = dataset.get_velo(401)
# c2 = velo2[:,:3]

fn1 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan13.txt"
c1 = np.loadtxt(fn1)
fn2 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan14.txt"
c2 = np.loadtxt(fn2)

it = ICET(cloud1 = c1, cloud2 = c2, fid = 50, niter = 9, 
           draw = False, group = 2, RM = True, DNN_filter = False)


 loading model took 4.76837158203125e-07 
 total:  5.7220458984375e-06

 shuffling and converting to tensor took  0.006495237350463867 
 total:  0.006583690643310547

 converting to spherical took 0.0327756404876709 
 total:  0.03940153121948242

 getting cluster took 0.5056931972503662 seconds !!!

 fit_gaussian for scan 1 0.029498577117919922 
 total:  0.9042644500732422

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.25449275970458984 
 total:  1.2374465465545654 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[-1.0480183e-09  3.4007790e-06  3.6277578e-07 -4.4495103e-08
 -4.2737582e-08  4.9248877e-07], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 correcting solution estimate 0.022118091583251953 
 total:  1.2596476078033447 
 ~~~~~~~~~~~~~~

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.27145862579345703 
 total:  1.5315909385681152 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[-1.0503055e-09  4.1004641e-06  2.7179695e-07 -4.4458101e-08
 -3.6679204e-08  7.3

# get_cluster()

In [54]:
def gt2(rads, thresh = 0.5, mnp = 100):
    """testing new method of finding radial bins for spherical voxels"""
    
    before = time.time()

    max_buffer = 0.2 

    if len(tf.shape(rads)) < 2:
        rads = rads[:,None]

    OG_rads = rads #hold on to OG rads
    #replace all zeros in rads (result of converting ragged -> standard tensor) with some arbitrarily large value
    mask = tf.cast(tf.math.equal(rads, 0), tf.float32)*1000
    rads = rads + mask
    # print(rads)

    #sort in ascending order for each column in tensor
    top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
#     print("\n top_k \n", top_k[1])
    rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
    rads = tf.reverse(rads, axis = tf.constant([0]))
    print("rads \n", rads)

    # calculate the forward difference between neighboring points
    z = tf.zeros([1, tf.shape(rads)[1].numpy()])
    shifted = tf.concat((rads[1:], z), axis = 0)
    diff = shifted - rads
    # diff = tf.math.abs(rads - shifted) #debug 6/9/22
#     print("\n diff \n", diff)

    # #find where difference jumps
    jumps = tf.where(diff > thresh)
#     print("\n jumps \n", jumps) #[idx of jump, which spike is jumping]

    #----------------------------------------------------------------------
    #not sure if actually needed...
    #get indexes of all used spikes
    used = jumps[:,1][None,:]
    # print("used", used)
    biggest = tf.math.reduce_max(used, axis = 1).numpy()[0]
    # print("biggest", biggest)
    all_spikes = tf.cast(tf.linspace(0,biggest,biggest+1), tf.int64)[None,:] #list all spikes total
    # print("all_spikes", all_spikes)

    #find differnce
    missing = tf.sets.difference(all_spikes, used).values[None,:]
    # print("\n missing", missing)
    # z = tf.zeros(tf.shape(missing), dtype = tf.int64) #wrong...
    # z = 51*tf.ones(tf.shape(missing), dtype = tf.int64) #wrong...
    # print("z", z)

    #z should be this...
    # print("\n OG_rads", OG_rads)
    # ends = tf.math.argmax(OG_rads, axis = 0) #wrong -> not max arg, last nonzero argument!!
    zero = tf.constant(0, dtype = tf.float32)
    ends = tf.math.reduce_sum(tf.cast(tf.not_equal(OG_rads, zero), tf.int64), axis = 0) #correct
    # print("\n ends", ends)

    test = tf.gather(ends, missing[0])  #get index of last element of missing jump section
    # print("\n test", test)
    z = test[None,:]
    z -= 2 #fixes indexing bug
    # print("z", z)

    missing = tf.transpose(tf.concat((z, missing), axis = 0))
    # print(missing)

    jumps = tf.concat((jumps, missing), axis = 0) #concat missing stuff back at the end of jumps
#     print("\n jumps after fix", jumps)
    #----------------------------------------------------------------------
    
    print("\n jumps: \n", jumps.numpy())
    
    #find where the first large cluster occurs in each spike
   

    
    bounds = None

    return(bounds, jumps)

In [55]:
from utils import get_cluster
# print("rads: \n", it.rads)

s = time.time()
bounds_old = get_cluster(it.rads, mnp = it.min_num_pts)
print("\n took", time.time() - s, " s with old method \n")
print("\n bounds_old: \n", bounds_old)
print(np.shape(bounds_old))

s = time.time()
bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)
print(" \n took", time.time() - s, " s with new method")


 getting cluster took 0.626060962677002 seconds !!!

 took 0.626514196395874  s with old method 


 bounds_old: 
 tf.Tensor(
[[ 6.24612904  8.17313671]
 [13.42310524 13.85848236]
 [17.12698936 19.22462273]
 [ 8.49700451  9.26582336]
 [ 4.87625551  6.12153435]
 [ 4.87632322  6.12142706]
 [19.32097054 22.42539024]
 [ 0.          0.        ]
 [ 6.24606848  8.17309666]
 [ 4.87628937  6.12145662]
 [ 6.24609518  8.17315769]
 [27.43743706 35.64250565]
 [ 8.80649471 12.60198975]
 [13.21931648 13.52005005]
 [15.29999828 17.19395065]
 [ 7.50367165  8.1603651 ]
 [ 8.41508961 12.00569153]
 [ 4.87628412  6.12150955]
 [ 4.87625456  6.12142181]
 [ 8.80651188 12.04703331]
 [ 0.          0.        ]
 [22.47119331 27.26955795]
 [ 6.24612474  8.17305088]
 [ 0.          0.        ]
 [15.3000164  16.33851814]
 [ 8.80652142 12.00578022]
 [ 6.24617672  8.55318737]
 [13.43597794 13.95995331]
 [14.59835339 15.70939255]
 [ 4.87631273  6.12150145]
 [ 4.87630606  6.1213932 ]
 [ 6.2460742   8.55319595]
 [ 8.41507

In [70]:
#identifying location of jumps without looping
print("old slow soln shape: \n", tf.shape(bounds_old)) #want to produce this same shape!!!
# print("\n bounds_old: \n", bounds_old[:10])

#get all radial measurements
#(temp-- already done inside function)-----------------------------
mask = tf.cast(tf.math.equal(it.rads, 0), tf.float32)*1000
rads = it.rads + mask
#sort in ascending order for each column in tensor
top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
rads = tf.reverse(rads, axis = tf.constant([0]))
# print("\n rads: \n", rads[:10])
# print("\n it.rads \n", it.rads)
#------------------------------------------------------------------

# print("\n jumps: \n", tf.shape(jumps))
# print("\n jumps: \n",jumps[:30])

y, idx = tf.unique(jumps[:,0])
# print("\n y \n", y, "\n \n idx", idx)
# print("\n jumps \n", jumps)

# get ragged tensor containing indices where jumps occur sin each wedge shaped voxel
jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], idx)
# append 0 to beginning of each ragged elemet of jumps_rag
zeros = tf.zeros(tf.shape(jumps_rag)[0])[:,None]
zeros = tf.cast(tf.RaggedTensor.from_tensor(zeros), tf.int64)
jumps_rag = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print("\n jumps_rag \n", jumps_rag[:10])
# print("\n jumps_rag \n", tf.shape(jumps_rag))

#get num points between each jump 
npts_between_jumps = tf.experimental.numpy.diff(jumps_rag.to_tensor())
print("\n npts_between_jumps:\n ",npts_between_jumps[:10,:10])
# print("\n npts_between_jumps:\n ",npts_between_jumps)

#flag spikes where all npts_between_jumps are less than mnp
biggest_jump = tf.math.reduce_max(npts_between_jumps, axis = 1)
print("\n biggest_jump \n", biggest_jump)
mnp = 50 #minumum number of points per cluster (defined in ICET class)
no_good_clusters = tf.cast(tf.math.less(biggest_jump, mnp), tf.int32)
print("\n no_good_clusters \n", no_good_clusters)

#get idx within jumps_rag corresponding to first sufficiently large jump
big_enough = tf.cast(tf.math.greater(npts_between_jumps, 100), tf.int32)
# print(big_enough[:10])
first_big_enough = tf.math.argmax(big_enough, axis = 1)
print("\n first_big_enough: \n", first_big_enough[:15])
# print("\n first_big_enough: \n", first_big_enough)

old slow soln shape: 
 tf.Tensor([245   2], shape=(2,), dtype=int32)

 jumps_rag 
 <tf.RaggedTensor [[0, 243, 244], [0, 243, 244], [0, 243, 244], [0, 133],
 [0, 7, 20, 23, 41, 64, 71, 79, 100, 105, 112, 115, 136, 147, 153, 192, 195,
  213, 215, 218, 239, 241]                                                  ,
 [0, 7, 20, 23, 24, 41, 64, 71, 79, 100, 105, 136, 147, 153, 163, 170, 192,
  195, 213, 215, 218, 239, 241]                                            ,
 [0, 7, 20, 23, 24, 41, 64, 71, 79, 100, 105, 136, 153, 163, 170, 195, 213,
  218, 239, 241]                                                           ,
 [0, 183], [0, 94, 110, 148],
 [0, 7, 20, 23, 24, 41, 64, 71, 79, 94, 100, 105, 110, 136, 148, 153, 163,
  170, 195, 213, 218, 239, 241]                                           ]>

 npts_between_jumps:
  tf.Tensor(
[[ 243    1 -244    0    0    0    0    0    0    0]
 [ 243    1 -244    0    0    0    0    0    0    0]
 [ 243    1 -244    0    0    0    0    0    0    0]
 [ 133 

In [73]:
#(continuted)
#------------------
#get inner and outer (temp simple way-- just use radial measurements of inner and outermost points in cluster)
inner = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims = 1)
print("\n inner: \n", inner[:10])
# print("\n inner: \n", inner)

#infill zero elements throughout (use tf.ragged.from_value_rowids keyed by y)
inner = tf.RaggedTensor.from_value_rowids(inner, y).to_tensor()[:,0]
#add zeros to end of bounds to get to same number of total voxels as OG_rads 
print(tf.shape(rads)[0])
print(tf.shape(inner))
# inner = tf.pad(inner, [[0,tf.shape(rads)[1]-len(inner)]]) #old
inner = tf.pad(inner, [[0,tf.shape(rads)[0]-len(inner)]]) #new
# print("\n inner \n", inner)
# print("\n inner \n", tf.shape(inner))

#concat idx and y, use gather_nd instead of converting to ragged and back?? 
idx = tf.concat((tf.cast(tf.range(len(inner))[:,None], tf.int64), inner[:,None]), axis = 1) #wrong?
# idx1 = tf.concat((inner[:,None], tf.cast(tf.range(len(inner))[:,None], tf.int64)), axis = 1) #test
print("\n idx \n", idx)
inside_bound = tf.gather_nd(rads, idx1)
print("\n inside_bound \n", inside_bound[:15])

#repeat for outside bound
outer = tf.gather(jumps_rag.to_tensor(), first_big_enough +1, batch_dims = 1)
outer = tf.RaggedTensor.from_value_rowids(outer, y).to_tensor()[:,0]
outer = tf.pad(outer, [[0,tf.shape(rads)[1]-len(outer)]]) #DEBUG-- make sure I'm using correct dimension of tf.shape(rads)
idx2 = tf.concat((outer[:,None], tf.cast(tf.range(len(outer))[:,None], tf.int64)), axis = 1) #test
outside_bound = tf.gather_nd(rads, idx2)
print("\n outside_bound \n", outside_bound[:15])

#------------------

# #test-----------------
# #infill zero elements throughout (use tf.ragged.from_value_rowids keyed by y)
# first_big_enough = tf.RaggedTensor.from_value_rowids(first_big_enough, y).to_tensor()[:,0]
# #add zeros to end of bounds to get to same number of total voxels as OG_rads 
# first_big_enough = tf.pad(first_big_enough, [[0,tf.shape(rads)[1]-len(first_big_enough)]])
# print("\n first_big_enough: \n", first_big_enough[:15])

# # inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough)
# # idx = tf.concat((tf.cast(tf.range(len(inner))[:,None], tf.int64), inner[:,None]), axis = 1)
# #--------------------

#TODO add voxel length padding 
#  (max half distance betweeen last in cluster and first point outside cluster)


 inner: 
 tf.Tensor([0 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
tf.Tensor(540, shape=(), dtype=int32)
tf.Tensor([539], shape=(1,), dtype=int32)

 idx 
 tf.Tensor(
[[  0   0]
 [  1   0]
 [  2   0]
 ...
 [537   0]
 [538   0]
 [539   0]], shape=(540, 2), dtype=int64)


2022-11-22 15:18:59.091895: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at gather_nd_op.cc:46 : INVALID_ARGUMENT: indices[539] = [0, 539] does not index into param shape [540,245], node name: GatherNd


InvalidArgumentError: {{function_node __wrapped__GatherNd_device_/job:localhost/replica:0/task:0/device:CPU:0}} indices[539] = [0, 539] does not index into param shape [540,245], node name: GatherNd [Op:GatherNd]

In [None]:
#multi-dimensional indexing test
# a = tf.random.uniform([3,3])
# print(a)
# idx = tf.constant([[1,1],[2,2]])
# b = tf.gather_nd(a, idx)
# print(b)

#test adding zeros to start of each ragged tensor
# print(tf.shape(jumps_rag))
zeros = tf.zeros(tf.shape(jumps_rag)[0])[:,None]
zeros = tf.cast(tf.RaggedTensor.from_tensor(zeros), tf.int64)
# print(tf.shape(zeros))
# print(tf.shape(jumps_rag))
test = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print(test)

# fit_gaussian()

In [6]:
def fg2(cloud, rag, npts):
    """new method of fitting gaussian to better handle ragged input data"""
    numSamples = 3
    
    coords = tf.gather(cloud, rag)
    mu = tf.math.reduce_mean(coords, axis = 1)[:,None]
#     mu = tf.math.reduce_mean(coords, axis = 1) #old
#     print(mu)

#   TODO: try randomly sampling 30 points from each ragged cell, use reduced num pts to calculate covariance
#     subsampled = tf.map_fn(sample, it.inside2) #works but SLOW
#     subsampled = tf.map_fn(sample, it.inside2, parallel_iterations=True)
#     subsampled = tf.gather(rag,tf.range(tf.shape(rag)[0]))[:numSamples] #wrong
#     print(subsampled)

    xpos = tf.gather(cloud[:,0], rag)
    ypos = tf.gather(cloud[:,1], rag)
    zpos = tf.gather(cloud[:,2], rag)
#     c = tfp.stats.covariance(xpos.to_tensor(), ypos.to_tensor())

#     print(xpos)
    idx = tf.range(30)
    xpos = tf.gather(xpos, idx, axis = 1)
    ypos = tf.gather(ypos, idx, axis = 1)
    zpos = tf.gather(zpos, idx, axis = 1)
    print(xpos)

    xx = tf.math.reduce_sum(tf.math.square(xpos - mu[:,:,0] ), axis = 1)/npts
    yy = tf.math.reduce_sum(tf.math.square(ypos - mu[:,:,1] ), axis = 1)/npts
    zz = tf.math.reduce_sum(tf.math.square(zpos - mu[:,:,2] ), axis = 1)/npts
    xy = tf.math.reduce_sum( (xpos - mu[:,:,0])*(ypos - mu[:,:,1]), axis = 1)/npts  #+
    xz = tf.math.reduce_sum( (xpos - mu[:,:,0])*(zpos - mu[:,:,2]), axis = 1)/npts #-
    yz = tf.math.reduce_sum( (ypos - mu[:,:,1])*(zpos - mu[:,:,2]), axis = 1)/npts #-

    sigma = tf.Variable([xx, xy, xz,
                        xy, yy, yz,
                        xz, yz, zz]) 
    sigma = tf.reshape(tf.transpose(sigma), (tf.shape(sigma)[1] ,3,3))
        
#     mu = None
    return(mu, sigma)

@tf.function
def sample(x, samples=3):
  """https://stackoverflow.com/questions/71073873/sample-from-ragged-tensor"""  
  length = tf.shape(x)[0]
#   was this
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.random.shuffle(tf.range(length))[:samples]))
 
#   test
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.range(length))[:samples])
  x = tf.gather(x,tf.range(length))[:samples]

    
  return x

In [None]:
s = time.time()
mu2, sigma2 = it.fit_gaussian(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print("\n took", time.time() - s, " s with old method")

s = time.time()
mu2, sigma2 = fg2(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print(" \n took", time.time() - s, " s with new method")

# print(it.npts2)
# print(it.inside2)

In [9]:
# vect = it.inside2
vect = tf.ragged.constant([[],[1,2,3,4],[5,4,3,2,1],[6],[99],[7,8,9,10,11,12,13]])
# print(tf.shape(vect)[0])
print("vect", vect)
c = tf.map_fn(sample, vect)
# print(c)

#wrong
# test = tf.gather(vect,tf.range(tf.shape(vect)[0]))[:3]
idx = tf.range(3)
print("\n idx", idx)
test = tf.gather(vect, idx , axis = 1)
print("\n test", test) #NOTE: indices with too few elements produce unexpected behavior
                        #that doesn't matter since they get suppressed anyways
    
vec2 = tf.random.categorical(vect, 2)
    

vect <tf.RaggedTensor [[], [1, 2, 3, 4], [5, 4, 3, 2, 1], [6], [99], [7, 8, 9, 10, 11, 12, 13]]>

 idx tf.Tensor([0 1 2], shape=(3,), dtype=int32)

 test tf.Tensor(
[[ 1  2  3]
 [ 1  2  3]
 [ 5  4  3]
 [ 6 99  7]
 [99  7  8]
 [ 7  8  9]], shape=(6, 3), dtype=int32)


ValueError: TypeError: object of type 'RaggedTensor' has no len()
