# Notebook for identifying and removing bottlenecks from ICET implementation

In [1]:
from vedo import *
import os
from ipyvtklink.viewer import ViewInteractiveWidget
import pykitti
import numpy as np
import tensorflow as tf
import time

#limit GPU memory ------------------------------------------------
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
  try:
    memlim = 4*1024
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memlim)])
  except RuntimeError as e:
    print(e)
#-----------------------------------------------------------------
# tf.config.set_visible_devices([], 'GPU') #run on CPU only -- seems to actually execute main parts of code faster here...

from tensorflow.math import sin, cos, tan
import tensorflow_probability as tfp
from ICET_spherical import ICET
from utils import R_tf
from metpy.calc import lat_lon_grid_deltas

%load_ext autoreload
%autoreload 2
%autosave 180
# %matplotlib notebook

# %%bash
# # python -m cProfile scan_match.py
# python scan_match.py

2022-11-20 14:36:08.471606: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-20 14:36:09.087791: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-20 14:36:10.084863: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/derm/anaconda3/envs/py39/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-20 14:36:10.084926: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_p

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-11-20 14:36:11.870839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-20 14:36:11.872523: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-20 14:36:11.872804: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-20 14:36:11.873008: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Autosaving every 180 seconds


In [9]:
basepath = '/media/derm/06EF-127D1/KITTI'
# sequence = '03' #forest
sequence = '09' #trees and small town
dataset = pykitti.odometry(basepath, sequence)
velo1 = dataset.get_velo(400)
c1 = velo1[:,:3]
velo2 = dataset.get_velo(401)
c2 = velo2[:,:3]

it = ICET(cloud1 = c1, cloud2 = c2, fid = 70, niter = 9, 
           draw = False, group = 2, RM = True, DNN_filter = False)

Ground truth poses are not avaialble for sequence 09.

 loading model took 4.76837158203125e-07 
 total:  5.0067901611328125e-06

 shuffling and converting to tensor took  0.006960153579711914 
 total:  0.006984710693359375

 converting to spherical took 0.016175270080566406 
 total:  0.023174524307250977

 getting cluster took 1.4108836650848389 seconds !!!

 fit_gaussian for scan 1 0.01434469223022461 
 total:  1.5166802406311035

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.06150174140930176 
 total:  1.6386260986328125 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor([ 0.30780596  0.01272369  0.00220542 -0.00058785  0.0013874  -0.00494641], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 correcting solution estimate 0.016361474990844727 
 total:  1.655013084411621 
 ~~~~~~~~~~~~~~

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.05533647537231445 
 total:  1.7106268405914307 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[ 6.6496092e-01  8.5625611e-04  8.4208352e-

# get_cluster()

In [185]:
def gt2(rads, thresh = 0.5, mnp = 100):
    """testing new method of finding radial bins for spherical voxels"""
    
    before = time.time()

    max_buffer = 0.2 

    if len(tf.shape(rads)) < 2:
        rads = rads[:,None]

    OG_rads = rads #hold on to OG rads
    #replace all zeros in rads (result of converting ragged -> standard tensor) with some arbitrarily large value
    mask = tf.cast(tf.math.equal(rads, 0), tf.float32)*1000
    rads = rads + mask
    # print(rads)

    #sort in ascending order for each column in tensor
    top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
#     print("\n top_k \n", top_k[1])
    rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
    rads = tf.reverse(rads, axis = tf.constant([0]))
    print("rads \n", rads)

    # calculate the forward difference between neighboring points
    z = tf.zeros([1, tf.shape(rads)[1].numpy()])
    shifted = tf.concat((rads[1:], z), axis = 0)
    diff = shifted - rads
    # diff = tf.math.abs(rads - shifted) #debug 6/9/22
#     print("\n diff \n", diff)

    # #find where difference jumps
    jumps = tf.where(diff > thresh)
#     print("\n jumps \n", jumps) #[idx of jump, which spike is jumping]

    #----------------------------------------------------------------------
    #not sure if actually needed...
    #get indexes of all used spikes
    used = jumps[:,1][None,:]
    # print("used", used)
    biggest = tf.math.reduce_max(used, axis = 1).numpy()[0]
    # print("biggest", biggest)
    all_spikes = tf.cast(tf.linspace(0,biggest,biggest+1), tf.int64)[None,:] #list all spikes total
    # print("all_spikes", all_spikes)

    #find differnce
    missing = tf.sets.difference(all_spikes, used).values[None,:]
    # print("\n missing", missing)
    # z = tf.zeros(tf.shape(missing), dtype = tf.int64) #wrong...
    # z = 51*tf.ones(tf.shape(missing), dtype = tf.int64) #wrong...
    # print("z", z)

    #z should be this...
    # print("\n OG_rads", OG_rads)
    # ends = tf.math.argmax(OG_rads, axis = 0) #wrong -> not max arg, last nonzero argument!!
    zero = tf.constant(0, dtype = tf.float32)
    ends = tf.math.reduce_sum(tf.cast(tf.not_equal(OG_rads, zero), tf.int64), axis = 0) #correct
    # print("\n ends", ends)

    test = tf.gather(ends, missing[0])  #get index of last element of missing jump section
    # print("\n test", test)
    z = test[None,:]
    z -= 2 #fixes indexing bug
    # print("z", z)

    missing = tf.transpose(tf.concat((z, missing), axis = 0))
    # print(missing)

    jumps = tf.concat((jumps, missing), axis = 0) #concat missing stuff back at the end of jumps
#     print("\n jumps after fix", jumps)
    #----------------------------------------------------------------------
    
    print("\n jumps: \n", jumps.numpy())
    
    #find where the first large cluster occurs in each spike
   

    
    bounds = None

    return(bounds, jumps)

In [186]:
from utils import get_cluster
# print("rads: \n", it.rads)

s = time.time()
bounds_old = get_cluster(it.rads, mnp = it.min_num_pts)
print("\n took", time.time() - s, " s with old method \n")

s = time.time()
bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)
print(" \n took", time.time() - s, " s with new method")


 took 1.4740512371063232  s with old method 

rads 
 tf.Tensor(
[[   6.308901     4.503432     3.6884313 ...    4.6837316    5.6261888
    54.913895 ]
 [   6.4553094    4.5076594    9.501441  ...    4.6909585 1000.
  1000.       ]
 [   6.456663     4.5077553    9.515896  ... 1000.        1000.
  1000.       ]
 ...
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]], shape=(506, 536), dtype=float32)

 jumps: 
 [[  0   2]
 [  0 163]
 [  0 283]
 ...
 [501 118]
 [502 162]
 [504 162]]
 
 took 0.006463766098022461  s with new method


In [240]:
#identifying location of jumps without looping
print("\n jumps: \n", tf.shape(jumps))
# print("\n jumps: \n",jumps[:30])
print(tf.shape(bounds_old))

y, idx = tf.unique(jumps[:,0])
# print("\n y \n", y, "\n \n idx", idx)

#get ragged tensor containing indices where jumps occur in each wedge shaped voxel
jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], idx)
print("\n jumps_rag \n", jumps_rag[:10])
# print("\n jumps_rag \n", tf.shape(jumps_rag))

#get num points between each jump 
npts_between_jumps = tf.experimental.numpy.diff(jumps_rag.to_tensor())
print("\n npts_between_jumps:\n ",npts_between_jumps[:10])

#get idx within jumps_rag corresponding to first sufficiently large jump
big_enough = tf.cast(tf.math.greater(npts_between_jumps, 100), tf.int32)
# print(big_enough[:10])
first_big_enough = tf.math.argmax(test, axis = 1)
print("\n first_big_enough: \n", first_big_enough[:10])

#get all radial measurements
#(temp-- already done inside function)-----------------------------
mask = tf.cast(tf.math.equal(it.rads, 0), tf.float32)*1000
rads = it.rads + mask
#sort in ascending order for each column in tensor
top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
rads = tf.reverse(rads, axis = tf.constant([0]))
print("\n rads: \n", rads)
#-----------------------------------------------------------

#get inner and outer (temp simple way-- just use radial measurements of inner and outermost points in cluster)
inner = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims = 1)
print("\n inner: \n", inner[:10])
# inner_rads = tf.gather(rads, inner, batch_dims = 0) #wrong
# print("\n inner rads: \n", inner_rads)

# outer = tf.gather(jumps_rag.to_tensor(), first_big_enough+1, batch_dims = 1)
# print("\n outer: \n", outer)

#TODO add padding 
#  (max half distance betweeen last in cluster and first point outside cluster)


 jumps: 
 tf.Tensor([1404    2], shape=(2,), dtype=int32)
tf.Tensor([536   2], shape=(2,), dtype=int32)

 jumps_rag 
 <tf.RaggedTensor [[2, 163, 283, 392, 409, 486, 492, 501, 511, 520, 524, 534, 535],
 [164, 249, 354, 485, 492, 506, 532, 533], [116, 140, 334, 450, 502],
 [93, 281, 283, 450, 523, 526], [121, 283, 374, 528],
 [31, 70, 141, 520, 523], [167, 283, 527, 529], [132, 528], [20, 273, 486],
 [283, 478, 497, 502, 518, 519, 531]]>

 npts_between_jumps:
  tf.Tensor(
[[ 161  120  109   17   77    6    9   10    9    4   10    1]
 [  85  105  131    7   14   26    1 -533    0    0    0    0]
 [  24  194  116   52 -502    0    0    0    0    0    0    0]
 [ 188    2  167   73    3 -526    0    0    0    0    0    0]
 [ 162   91  154 -528    0    0    0    0    0    0    0    0]
 [  39   71  379    3 -523    0    0    0    0    0    0    0]
 [ 116  244    2 -529    0    0    0    0    0    0    0    0]
 [ 396 -528    0    0    0    0    0    0    0    0    0    0]
 [ 253  213 -486    

# fit_gaussian()

In [6]:
def fg2(cloud, rag, npts):
    """new method of fitting gaussian to better handle ragged input data"""
    numSamples = 3
    
    coords = tf.gather(cloud, rag)
    mu = tf.math.reduce_mean(coords, axis = 1)[:,None]
#     mu = tf.math.reduce_mean(coords, axis = 1) #old
#     print(mu)

#   TODO: try randomly sampling 30 points from each ragged cell, use reduced num pts to calculate covariance
#     subsampled = tf.map_fn(sample, it.inside2) #works but SLOW
#     subsampled = tf.map_fn(sample, it.inside2, parallel_iterations=True)
#     subsampled = tf.gather(rag,tf.range(tf.shape(rag)[0]))[:numSamples] #wrong
#     print(subsampled)

    xpos = tf.gather(cloud[:,0], rag)
    ypos = tf.gather(cloud[:,1], rag)
    zpos = tf.gather(cloud[:,2], rag)
#     c = tfp.stats.covariance(xpos.to_tensor(), ypos.to_tensor())

#     print(xpos)
    idx = tf.range(30)
    xpos = tf.gather(xpos, idx, axis = 1)
    ypos = tf.gather(ypos, idx, axis = 1)
    zpos = tf.gather(zpos, idx, axis = 1)
    print(xpos)

    xx = tf.math.reduce_sum(tf.math.square(xpos - mu[:,:,0] ), axis = 1)/npts
    yy = tf.math.reduce_sum(tf.math.square(ypos - mu[:,:,1] ), axis = 1)/npts
    zz = tf.math.reduce_sum(tf.math.square(zpos - mu[:,:,2] ), axis = 1)/npts
    xy = tf.math.reduce_sum( (xpos - mu[:,:,0])*(ypos - mu[:,:,1]), axis = 1)/npts  #+
    xz = tf.math.reduce_sum( (xpos - mu[:,:,0])*(zpos - mu[:,:,2]), axis = 1)/npts #-
    yz = tf.math.reduce_sum( (ypos - mu[:,:,1])*(zpos - mu[:,:,2]), axis = 1)/npts #-

    sigma = tf.Variable([xx, xy, xz,
                        xy, yy, yz,
                        xz, yz, zz]) 
    sigma = tf.reshape(tf.transpose(sigma), (tf.shape(sigma)[1] ,3,3))
        
#     mu = None
    return(mu, sigma)

@tf.function
def sample(x, samples=3):
  """https://stackoverflow.com/questions/71073873/sample-from-ragged-tensor"""  
  length = tf.shape(x)[0]
#   was this
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.random.shuffle(tf.range(length))[:samples]))
 
#   test
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.range(length))[:samples])
  x = tf.gather(x,tf.range(length))[:samples]

    
  return x

In [None]:
s = time.time()
mu2, sigma2 = it.fit_gaussian(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print("\n took", time.time() - s, " s with old method")

s = time.time()
mu2, sigma2 = fg2(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print(" \n took", time.time() - s, " s with new method")

# print(it.npts2)
# print(it.inside2)

In [9]:
# vect = it.inside2
vect = tf.ragged.constant([[],[1,2,3,4],[5,4,3,2,1],[6],[99],[7,8,9,10,11,12,13]])
# print(tf.shape(vect)[0])
print("vect", vect)
c = tf.map_fn(sample, vect)
# print(c)

#wrong
# test = tf.gather(vect,tf.range(tf.shape(vect)[0]))[:3]
idx = tf.range(3)
print("\n idx", idx)
test = tf.gather(vect, idx , axis = 1)
print("\n test", test) #NOTE: indices with too few elements produce unexpected behavior
                        #that doesn't matter since they get suppressed anyways
    
vec2 = tf.random.categorical(vect, 2)
    

vect <tf.RaggedTensor [[], [1, 2, 3, 4], [5, 4, 3, 2, 1], [6], [99], [7, 8, 9, 10, 11, 12, 13]]>

 idx tf.Tensor([0 1 2], shape=(3,), dtype=int32)

 test tf.Tensor(
[[ 1  2  3]
 [ 1  2  3]
 [ 5  4  3]
 [ 6 99  7]
 [99  7  8]
 [ 7  8  9]], shape=(6, 3), dtype=int32)


ValueError: TypeError: object of type 'RaggedTensor' has no len()
