# Notebook for identifying and removing bottlenecks from ICET implementation

In [1]:
from vedo import *
import os
from ipyvtklink.viewer import ViewInteractiveWidget
import pykitti
import numpy as np
import tensorflow as tf
import time

#limit GPU memory ------------------------------------------------
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
  try:
    memlim = 4*1024
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memlim)])
  except RuntimeError as e:
    print(e)
#-----------------------------------------------------------------
# tf.config.set_visible_devices([], 'GPU') #run on CPU only -- seems to actually execute main parts of code faster here...

from tensorflow.math import sin, cos, tan
import tensorflow_probability as tfp
from ICET_spherical import ICET
from utils import R_tf
from metpy.calc import lat_lon_grid_deltas

%load_ext autoreload
%autoreload 2
%autosave 180
# %matplotlib notebook

# %%bash
# # python -m cProfile scan_match.py
# python scan_match.py

2022-11-21 09:31:16.403426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-21 09:31:17.027722: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-21 09:31:18.019421: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/derm/anaconda3/envs/py39/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-21 09:31:18.019499: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_p

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-11-21 09:31:19.833191: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-21 09:31:19.834829: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-21 09:31:19.835099: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-21 09:31:19.835518: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Autosaving every 180 seconds


In [9]:
basepath = '/media/derm/06EF-127D1/KITTI'
# sequence = '03' #forest
sequence = '09' #trees and small town
dataset = pykitti.odometry(basepath, sequence)
velo1 = dataset.get_velo(400)
c1 = velo1[:,:3]
velo2 = dataset.get_velo(401)
c2 = velo2[:,:3]

it = ICET(cloud1 = c1, cloud2 = c2, fid = 70, niter = 9, 
           draw = False, group = 2, RM = True, DNN_filter = False)

Ground truth poses are not avaialble for sequence 09.

 loading model took 4.76837158203125e-07 
 total:  4.291534423828125e-06

 shuffling and converting to tensor took  0.030080080032348633 
 total:  0.0301055908203125

 converting to spherical took 0.015475988388061523 
 total:  0.04559779167175293

 fit_gaussian for scan 1 0.01750493049621582 
 total:  1.5329134464263916

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.0571894645690918 
 total:  1.64686918258667 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor([ 0.33976105  0.0124645   0.00270582 -0.00057757  0.00145208 -0.00518778], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 correcting solution estimate 0.01694178581237793 
 total:  1.6638340950012207 
 ~~~~~~~~~~~~~~

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.05123162269592285 
 total:  1.7153396606445312 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[ 6.9391894e-01 -1.3333373e-03  8.4738331e-03 -4.3932843e-04
  1.7295798e-03  1.7378689e-03], shape=(6,)

# get_cluster()

In [412]:
def gt2(rads, thresh = 0.5, mnp = 100):
    """testing new method of finding radial bins for spherical voxels"""
    
    before = time.time()

    max_buffer = 0.2 

    if len(tf.shape(rads)) < 2:
        rads = rads[:,None]

    OG_rads = rads #hold on to OG rads
    #replace all zeros in rads (result of converting ragged -> standard tensor) with some arbitrarily large value
    mask = tf.cast(tf.math.equal(rads, 0), tf.float32)*1000
    rads = rads + mask
    # print(rads)

    #sort in ascending order for each column in tensor
    top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
#     print("\n top_k \n", top_k[1])
    rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
    rads = tf.reverse(rads, axis = tf.constant([0]))
    print("rads \n", rads)

    # calculate the forward difference between neighboring points
    z = tf.zeros([1, tf.shape(rads)[1].numpy()])
    shifted = tf.concat((rads[1:], z), axis = 0)
    diff = shifted - rads
    # diff = tf.math.abs(rads - shifted) #debug 6/9/22
#     print("\n diff \n", diff)

    # #find where difference jumps
    jumps = tf.where(diff > thresh)
#     print("\n jumps \n", jumps) #[idx of jump, which spike is jumping]

    #----------------------------------------------------------------------
    #not sure if actually needed...
    #get indexes of all used spikes
    used = jumps[:,1][None,:]
    # print("used", used)
    biggest = tf.math.reduce_max(used, axis = 1).numpy()[0]
    # print("biggest", biggest)
    all_spikes = tf.cast(tf.linspace(0,biggest,biggest+1), tf.int64)[None,:] #list all spikes total
    # print("all_spikes", all_spikes)

    #find differnce
    missing = tf.sets.difference(all_spikes, used).values[None,:]
    # print("\n missing", missing)
    # z = tf.zeros(tf.shape(missing), dtype = tf.int64) #wrong...
    # z = 51*tf.ones(tf.shape(missing), dtype = tf.int64) #wrong...
    # print("z", z)

    #z should be this...
    # print("\n OG_rads", OG_rads)
    # ends = tf.math.argmax(OG_rads, axis = 0) #wrong -> not max arg, last nonzero argument!!
    zero = tf.constant(0, dtype = tf.float32)
    ends = tf.math.reduce_sum(tf.cast(tf.not_equal(OG_rads, zero), tf.int64), axis = 0) #correct
    # print("\n ends", ends)

    test = tf.gather(ends, missing[0])  #get index of last element of missing jump section
    # print("\n test", test)
    z = test[None,:]
    z -= 2 #fixes indexing bug
    # print("z", z)

    missing = tf.transpose(tf.concat((z, missing), axis = 0))
    # print(missing)

    jumps = tf.concat((jumps, missing), axis = 0) #concat missing stuff back at the end of jumps
#     print("\n jumps after fix", jumps)
    #----------------------------------------------------------------------
    
    print("\n jumps: \n", jumps.numpy())
    
    #find where the first large cluster occurs in each spike
   

    
    bounds = None

    return(bounds, jumps)

In [415]:
from utils import get_cluster
# print("rads: \n", it.rads)

s = time.time()
bounds_old = get_cluster(it.rads, mnp = it.min_num_pts)
print("\n took", time.time() - s, " s with old method \n")
print("\n bounds_old: \n", bounds_old)
print(np.shape(bounds_old))

s = time.time()
bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)
print(" \n took", time.time() - s, " s with new method")


 took 1.0185918807983398  s with old method 


 bounds_old: 
 tf.Tensor(
[[ 8.48445892 12.37514305]
 [ 7.32219505 10.72554016]
 [ 5.35522413  6.61872005]
 ...
 [ 0.          0.        ]
 [ 0.          0.        ]
 [ 0.          0.        ]], shape=(536, 2), dtype=float64)
(536, 2)
rads 
 tf.Tensor(
[[   8.449831     7.2839036    5.336023  ...   27.126928     3.7713213
    54.913895 ]
 [   8.484459     7.322195     5.355224  ...   27.1396       3.779652
  1000.       ]
 [   8.487444     7.3482585    5.3561454 ...   27.14838      3.8077207
  1000.       ]
 ...
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]
 [1000.        1000.        1000.        ... 1000.        1000.
  1000.       ]], shape=(506, 536), dtype=float32)

 jumps: 
 [[  0  45]
 [  0 162]
 [  0 269]
 ...
 [501 102]
 [502 287]
 [504 287]]
 
 took 0.006579399108886719  s with new method


In [458]:
#identifying location of jumps without looping
print("old slow soln shape: \n", tf.shape(bounds_old)) #want to produce this same shape!!!
print("\n bounds_old: \n", bounds_old[:15])

#get all radial measurements
#(temp-- already done inside function)-----------------------------
mask = tf.cast(tf.math.equal(it.rads, 0), tf.float32)*1000
rads = it.rads + mask
#sort in ascending order for each column in tensor
top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
rads = tf.reverse(rads, axis = tf.constant([0]))
# print("\n rads: \n", rads[:10])
# print("\n it.rads \n", it.rads)
#------------------------------------------------------------------

# print("\n jumps: \n", tf.shape(jumps))
# print("\n jumps: \n",jumps[:30])

y, idx = tf.unique(jumps[:,0])
# print("\n y \n", y, "\n \n idx", idx)
# print("\n jumps \n", jumps)

# get ragged tensor containing indices where jumps occur in each wedge shaped voxel
jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], idx)
# append 0 to beginning of each ragged elemet of jumps_rag
jumps_rag = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print("\n jumps_rag \n", jumps_rag[:15])

#get num points between each jump 
npts_between_jumps = tf.experimental.numpy.diff(jumps_rag.to_tensor())
# print("\n npts_between_jumps:\n ",npts_between_jumps[:15])
# print("\n npts_between_jumps:\n ",npts_between_jumps)

#get idx within jumps_rag corresponding to first sufficiently large jump
big_enough = tf.cast(tf.math.greater(npts_between_jumps, 100), tf.int32)
# print(big_enough[:10])
first_big_enough = tf.math.argmax(big_enough, axis = 1)
print("\n first_big_enough: \n", first_big_enough[:15])
# print("\n first_big_enough: \n", first_big_enough)

#------------------
#get inner and outer (temp simple way-- just use radial measurements of inner and outermost points in cluster)
inner = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims = 1)
print("\n inner: \n", inner[:10])
# print("\n inner: \n", inner)

#infill zero elements throughout (use tf.ragged.from_value_rowids keyed by y)
inner = tf.RaggedTensor.from_value_rowids(inner, y).to_tensor()[:,0]
#add zeros to end of bounds to get to same number of total voxels as OG_rads 
inner = tf.pad(inner, [[0,tf.shape(rads)[1]-len(inner)]]) #DEBUG-- make sure I'm using correct dimension of tf.shape(rads)
# print("\n inner \n", inner)
# print("\n inner \n", tf.shape(inner))

#concat idx and y, use gather_nd instead of converting to ragged and back?? 
# idx = tf.concat((tf.cast(tf.range(len(inner))[:,None], tf.int64), inner[:,None]), axis = 1) #wrong?
idx1 = tf.concat((inner[:,None], tf.cast(tf.range(len(inner))[:,None], tf.int64)), axis = 1) #test
print("\n idx \n", idx)
inside_bound = tf.gather_nd(rads, idx1)
print("\n inside_bound \n", inside_bound[:15])


#repeat for outside bound
outer = tf.gather(jumps_rag.to_tensor(), first_big_enough +1, batch_dims = 1)
outer = tf.RaggedTensor.from_value_rowids(outer, y).to_tensor()[:,0]
outer = tf.pad(outer, [[0,tf.shape(rads)[1]-len(outer)]]) #DEBUG-- make sure I'm using correct dimension of tf.shape(rads)
idx2 = tf.concat((outer[:,None], tf.cast(tf.range(len(outer))[:,None], tf.int64)), axis = 1) #test
outside_bound = tf.gather_nd(rads, idx2)
print("\n outside_bound \n", outside_bound[:15])

#------------------

# #test-----------------
# #infill zero elements throughout (use tf.ragged.from_value_rowids keyed by y)
# first_big_enough = tf.RaggedTensor.from_value_rowids(first_big_enough, y).to_tensor()[:,0]
# #add zeros to end of bounds to get to same number of total voxels as OG_rads 
# first_big_enough = tf.pad(first_big_enough, [[0,tf.shape(rads)[1]-len(first_big_enough)]])
# print("\n first_big_enough: \n", first_big_enough[:15])

# # inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough)
# # idx = tf.concat((tf.cast(tf.range(len(inner))[:,None], tf.int64), inner[:,None]), axis = 1)
# #--------------------

#TODO add voxel length padding 
#  (max half distance betweeen last in cluster and first point outside cluster)

old slow soln shape: 
 tf.Tensor([536   2], shape=(2,), dtype=int32)

 bounds_old: 
 tf.Tensor(
[[ 8.48445892 12.37514305]
 [ 7.32219505 10.72554016]
 [ 5.35522413  6.61872005]
 [ 5.20210648  6.24694252]
 [ 0.          0.        ]
 [ 4.89096785  5.85317039]
 [ 4.64766264  5.13235998]
 [ 4.55753613  5.00266695]
 [11.36053562 18.66326904]
 [11.01212311 18.64307213]
 [ 3.32399344  3.54074168]
 [ 3.50027776  5.51918268]
 [ 6.24643803  7.73354149]
 [ 8.13937759 10.06496334]
 [ 6.92522478  9.03021431]], shape=(15, 2), dtype=float64)

 jumps_rag 
 <tf.RaggedTensor [[0, 45, 162, 269, 316, 376, 379, 448, 486, 500, 513, 524, 531, 535],
 [0, 13, 255, 349, 385, 389, 493, 500, 523], [0, 87, 242, 321, 384, 411],
 [0, 242, 269, 281, 427, 532, 534], [0, 269, 407, 425, 514],
 [0, 44, 329, 524, 529, 534], [0, 104, 164, 269, 517], [0, 426, 514],
 [0, 263, 444, 513], [0, 254, 269, 384, 481, 490, 528, 533],
 [0, 311, 456, 513, 520], [0, 349, 514], [0, 349, 437, 513, 516, 523],
 [0, 349, 384, 523, 530], [0,

In [429]:
#multi-dimensional indexing test
# a = tf.random.uniform([3,3])
# print(a)
# idx = tf.constant([[1,1],[2,2]])
# b = tf.gather_nd(a, idx)
# print(b)

#test adding zeros to start of each ragged tensor
# print(tf.shape(jumps_rag))
zeros = tf.zeros(tf.shape(jumps_rag)[0])[:,None]
zeros = tf.cast(tf.RaggedTensor.from_tensor(zeros), tf.int64)
# print(tf.shape(zeros))
# print(tf.shape(jumps_rag))
test = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print(test)

<tf.RaggedTensor [[0, 0, 45, 162, 269, 316, 376, 379, 448, 486, 500, 513, 524, 531, 535],
 [0, 0, 13, 255, 349, 385, 389, 493, 500, 523],
 [0, 0, 87, 242, 321, 384, 411], [0, 0, 242, 269, 281, 427, 532, 534],
 [0, 0, 269, 407, 425, 514], [0, 0, 44, 329, 524, 529, 534],
 [0, 0, 104, 164, 269, 517], [0, 0, 426, 514], [0, 0, 263, 444, 513],
 [0, 0, 254, 269, 384, 481, 490, 528, 533], [0, 0, 311, 456, 513, 520],
 [0, 0, 349, 514], [0, 0, 349, 437, 513, 516, 523],
 [0, 0, 349, 384, 523, 530], [0, 0, 271, 385], [0, 0, 172],
 [0, 0, 329, 426, 518, 526], [0, 0, 168, 417, 457, 491], [0, 0, 504],
 [0, 0, 278, 306], [0, 0, 168, 278, 385, 412, 481], [0, 0, 150],
 [0, 0, 199], [0, 0, 254, 278], [0, 0, 98, 136, 529],
 [0, 0, 278, 490, 508], [0, 0, 364, 384, 399],
 [0, 0, 254, 364, 384, 481, 521], [0, 0, 254, 278, 384, 433],
 [0, 0, 49, 147, 150, 239, 243, 384, 432, 481],
 [0, 0, 29, 58, 119, 228, 384, 412, 448, 513, 516, 522],
 [0, 0, 54, 84, 117, 272, 384, 385, 515], [0, 0, 270, 384, 498, 516, 519]

# fit_gaussian()

In [6]:
def fg2(cloud, rag, npts):
    """new method of fitting gaussian to better handle ragged input data"""
    numSamples = 3
    
    coords = tf.gather(cloud, rag)
    mu = tf.math.reduce_mean(coords, axis = 1)[:,None]
#     mu = tf.math.reduce_mean(coords, axis = 1) #old
#     print(mu)

#   TODO: try randomly sampling 30 points from each ragged cell, use reduced num pts to calculate covariance
#     subsampled = tf.map_fn(sample, it.inside2) #works but SLOW
#     subsampled = tf.map_fn(sample, it.inside2, parallel_iterations=True)
#     subsampled = tf.gather(rag,tf.range(tf.shape(rag)[0]))[:numSamples] #wrong
#     print(subsampled)

    xpos = tf.gather(cloud[:,0], rag)
    ypos = tf.gather(cloud[:,1], rag)
    zpos = tf.gather(cloud[:,2], rag)
#     c = tfp.stats.covariance(xpos.to_tensor(), ypos.to_tensor())

#     print(xpos)
    idx = tf.range(30)
    xpos = tf.gather(xpos, idx, axis = 1)
    ypos = tf.gather(ypos, idx, axis = 1)
    zpos = tf.gather(zpos, idx, axis = 1)
    print(xpos)

    xx = tf.math.reduce_sum(tf.math.square(xpos - mu[:,:,0] ), axis = 1)/npts
    yy = tf.math.reduce_sum(tf.math.square(ypos - mu[:,:,1] ), axis = 1)/npts
    zz = tf.math.reduce_sum(tf.math.square(zpos - mu[:,:,2] ), axis = 1)/npts
    xy = tf.math.reduce_sum( (xpos - mu[:,:,0])*(ypos - mu[:,:,1]), axis = 1)/npts  #+
    xz = tf.math.reduce_sum( (xpos - mu[:,:,0])*(zpos - mu[:,:,2]), axis = 1)/npts #-
    yz = tf.math.reduce_sum( (ypos - mu[:,:,1])*(zpos - mu[:,:,2]), axis = 1)/npts #-

    sigma = tf.Variable([xx, xy, xz,
                        xy, yy, yz,
                        xz, yz, zz]) 
    sigma = tf.reshape(tf.transpose(sigma), (tf.shape(sigma)[1] ,3,3))
        
#     mu = None
    return(mu, sigma)

@tf.function
def sample(x, samples=3):
  """https://stackoverflow.com/questions/71073873/sample-from-ragged-tensor"""  
  length = tf.shape(x)[0]
#   was this
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.random.shuffle(tf.range(length))[:samples]))
 
#   test
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.range(length))[:samples])
  x = tf.gather(x,tf.range(length))[:samples]

    
  return x

In [None]:
s = time.time()
mu2, sigma2 = it.fit_gaussian(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print("\n took", time.time() - s, " s with old method")

s = time.time()
mu2, sigma2 = fg2(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print(" \n took", time.time() - s, " s with new method")

# print(it.npts2)
# print(it.inside2)

In [9]:
# vect = it.inside2
vect = tf.ragged.constant([[],[1,2,3,4],[5,4,3,2,1],[6],[99],[7,8,9,10,11,12,13]])
# print(tf.shape(vect)[0])
print("vect", vect)
c = tf.map_fn(sample, vect)
# print(c)

#wrong
# test = tf.gather(vect,tf.range(tf.shape(vect)[0]))[:3]
idx = tf.range(3)
print("\n idx", idx)
test = tf.gather(vect, idx , axis = 1)
print("\n test", test) #NOTE: indices with too few elements produce unexpected behavior
                        #that doesn't matter since they get suppressed anyways
    
vec2 = tf.random.categorical(vect, 2)
    

vect <tf.RaggedTensor [[], [1, 2, 3, 4], [5, 4, 3, 2, 1], [6], [99], [7, 8, 9, 10, 11, 12, 13]]>

 idx tf.Tensor([0 1 2], shape=(3,), dtype=int32)

 test tf.Tensor(
[[ 1  2  3]
 [ 1  2  3]
 [ 5  4  3]
 [ 6 99  7]
 [99  7  8]
 [ 7  8  9]], shape=(6, 3), dtype=int32)


ValueError: TypeError: object of type 'RaggedTensor' has no len()
