# Notebook for identifying and removing bottlenecks from ICET implementation

In [1]:
from vedo import *
import os
from ipyvtklink.viewer import ViewInteractiveWidget
import pykitti
import numpy as np
import tensorflow as tf
import time

#limit GPU memory ------------------------------------------------
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
if gpus:
  try:
    memlim = 4*1024
    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=memlim)])
  except RuntimeError as e:
    print(e)
#-----------------------------------------------------------------
# tf.config.set_visible_devices([], 'GPU') #run on CPU only -- seems to actually execute main parts of code faster here...

from tensorflow.math import sin, cos, tan
import tensorflow_probability as tfp
from ICET_spherical import ICET
from utils import R_tf
from metpy.calc import lat_lon_grid_deltas

%load_ext autoreload
%autoreload 2
%autosave 180
# %matplotlib notebook

# %%bash
# # python -m cProfile scan_match.py
# python scan_match.py

2022-11-29 12:59:49.842721: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-29 12:59:50.473432: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-29 12:59:51.458502: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/derm/anaconda3/envs/py39/lib/python3.9/site-packages/cv2/../../lib64:
2022-11-29 12:59:51.458583: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_p

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


2022-11-29 12:59:53.248241: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-29 12:59:53.249740: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-29 12:59:53.249938: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-29 12:59:53.250083: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Autosaving every 180 seconds


In [2]:
basepath = '/media/derm/06EF-127D1/KITTI'
# sequence = '03' #forest
sequence = '09' #trees and small town
dataset = pykitti.odometry(basepath, sequence)
velo1 = dataset.get_velo(400)
c1 = velo1[:,:3]
velo2 = dataset.get_velo(401)
c2 = velo2[:,:3]

# fn1 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan13.txt"
# c1 = np.loadtxt(fn1)
# fn2 = "/home/derm/ASAR/v3/spherical_paper/MC_trajectories/scene1_scan14.txt"
# c2 = np.loadtxt(fn2)

it = ICET(cloud1 = c1, cloud2 = c2, fid = 50, niter = 9, 
           draw = False, group = 2, RM = True, DNN_filter = False)

Ground truth poses are not avaialble for sequence 09.

 getting cluster took 0.06617212295532227 seconds !!!


2022-11-29 12:59:57.468484: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2022-11-29 12:59:57.534381: I tensorflow/core/util/cuda_solvers.cc:179] Creating GpuSolver handles for stream 0x1b45f3c0



 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.05447196960449219 
 total:  1.9408204555511475 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor([ 0.28038415  0.02009672  0.00207677 -0.00075165  0.0010415  -0.00503131], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.051053762435913086 
 total:  2.5914738178253174 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[ 5.6374705e-01  1.3857881e-02  6.5414929e-03 -4.1655061e-04
  1.4584193e-03 -1.9518519e-03], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.04572796821594238 
 total:  2.653541088104248 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[ 7.5490296e-01  8.4367171e-03  1.0045056e-02 -5.0080096e-04
  1.7461479e-03  4.5170775e-04], shape=(6,), dtype=float32)

 ~~~~~~~~~~~~~~ 
 fit_gaussian for scan 2 0.04253649711608887 
 total:  2.716052770614624 
 ~~~~~~~~~~~~~~

 estimated solution vector X: 
 tf.Tensor(
[ 8.5301769e-01  9.3586408e-03  1.1339078e-02 

# get_cluster()

In [3]:
def gt2(rads, thresh = 0.5, mnp = 100):
    """testing new method of finding radial bins for spherical voxels"""
    
    before = time.time()

    max_buffer = 0.2 

    if len(tf.shape(rads)) < 2:
        rads = rads[:,None]

    OG_rads = rads #hold on to OG rads
    #replace all zeros in rads (result of converting ragged -> standard tensor) with some arbitrarily large value
    mask = tf.cast(tf.math.equal(rads, 0), tf.float32)*1000
    rads = rads + mask
    # print(rads)

    #sort in ascending order for each column in tensor
    top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
#     print("\n top_k \n", top_k[1])
    rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
    rads = tf.reverse(rads, axis = tf.constant([0]))
#     print("rads \n", rads)

    # calculate the forward difference between neighboring points
    z = tf.zeros([1, tf.shape(rads)[1].numpy()])
    shifted = tf.concat((rads[1:], z), axis = 0)
    diff = shifted - rads

    # #find where difference jumps
    jumps = tf.where(diff > thresh)
#     print("\n jumps \n", jumps) #[idx of jump, which spike is jumping]

    #----------------------------------------------------------------------
    #not sure if actually needed...
    #get indexes of all used spikes
    used = jumps[:,1][None,:]
    # print("used", used)
    biggest = tf.math.reduce_max(used, axis = 1).numpy()[0]
    # print("biggest", biggest)
    all_spikes = tf.cast(tf.linspace(0,biggest,biggest+1), tf.int64)[None,:] #list all spikes total
    # print("all_spikes", all_spikes)

    #find differnce
    missing = tf.sets.difference(all_spikes, used).values[None,:]
    # print("\n missing", missing)
    # z = tf.zeros(tf.shape(missing), dtype = tf.int64) #wrong...
    # z = 51*tf.ones(tf.shape(missing), dtype = tf.int64) #wrong...
    # print("z", z)

    #z should be this...
    # print("\n OG_rads", OG_rads)
    # ends = tf.math.argmax(OG_rads, axis = 0) #wrong -> not max arg, last nonzero argument!!
    zero = tf.constant(0, dtype = tf.float32)
    ends = tf.math.reduce_sum(tf.cast(tf.not_equal(OG_rads, zero), tf.int64), axis = 0) #correct
    # print("\n ends", ends)

    test = tf.gather(ends, missing[0])  #get index of last element of missing jump section
    # print("\n test", test)
    z = test[None,:]
    z -= 2 #fixes indexing bug
    # print("z", z)

    missing = tf.transpose(tf.concat((z, missing), axis = 0))
    # print(missing)

    jumps = tf.concat((jumps, missing), axis = 0) #concat missing stuff back at the end of jumps
#     print("\n jumps after fix", jumps)
    #----------------------------------------------------------------------
    
    print("\n jumps: \n", jumps)
    
    #find where the first large cluster occurs in each spike
   

    
    bounds = None

    return(bounds, jumps)

In [4]:
from utils import get_cluster, get_cluster_fast
# print("rads: \n", it.rads)

s = time.time()
bounds_old = get_cluster(it.rads, mnp = it.min_num_pts)
print("\n took", time.time() - s, " s with old method \n")
print("\n bounds_old: \n", bounds_old[:10])
print(np.shape(bounds_old))

s = time.time()
# bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)
bounds_new = get_cluster_fast(it.rads, mnp = it.min_num_pts)
print("\n bounds_new: \n", bounds_new[:10])
print(" \n took", time.time() - s, " s with new method")


 getting cluster took 0.44474339485168457 seconds !!!

 took 0.445101261138916  s with old method 


 bounds_old: 
 tf.Tensor(
[[ 5.1231842   7.03590822]
 [15.22507858 29.69450188]
 [ 6.36735344  8.2538538 ]
 [ 4.2986064   5.04593325]
 [ 7.83050394  9.93404961]
 [11.98708725 13.45140171]
 [17.63828278 23.86518097]
 [ 4.85813999  6.34545469]
 [ 4.84403372  5.59854412]
 [ 8.26793957 12.90435886]], shape=(10, 2), dtype=float64)
(247, 2)

 getting cluster took 0.016243457794189453 seconds !!!

 bounds_new: 
 tf.Tensor(
[[ 5.123184   7.035908 ]
 [15.225079  29.694502 ]
 [ 6.3673534  8.253854 ]
 [ 4.2986064  5.0459332]
 [ 7.830504   9.93405  ]
 [11.987087  13.451402 ]
 [17.638283  23.865181 ]
 [ 4.85814    6.3454547]
 [ 4.8440337  5.598544 ]
 [ 8.26794   12.904359 ]], shape=(10, 2), dtype=float32)
 
 took 0.016746044158935547  s with new method


In [6]:
#identifying location of jumps without looping
print("old slow soln shape: \n", tf.shape(bounds_old)) #want to produce this same shape!!!
# print("\n bounds_old: \n", bounds_old[:10])

bounds_new, jumps = gt2(it.rads, mnp = it.min_num_pts)

#get all radial measurements
#(temp-- already done inside function)-----------------------------
mask = tf.cast(tf.math.equal(it.rads, 0), tf.float32)*1000
rads = it.rads + mask
#sort in ascending order for each column in tensor
top_k = tf.math.top_k(tf.transpose(rads), k = tf.shape(rads)[0])
rads = tf.transpose(tf.gather(tf.transpose(rads), top_k[1], batch_dims = 1))
rads = tf.reverse(rads, axis = tf.constant([0]))
# print("\n rads: \n", rads[:10])
# print("\n rads: \n", np.shape(rads))
# print("\n it.rads \n", it.rads)
#------------------------------------------------------------------

# print("\n jumps: \n", tf.shape(jumps))
# print("\n jumps: \n", jumps) #[idx of jump, which spike is jumping]

# y, idx = tf.unique(jumps[:,0]) #was this
jumps_temp = tf.gather(jumps, tf.argsort(jumps[:,1]), axis=0) #reorder based on index
y, idx = tf.unique(jumps_temp[:,1]) #test
print("\n y \n", y[:15], "\n", tf.shape(y), "\n \n idx \n", idx[:15], "\n", tf.shape(idx))
# print("\n jumps_temp \n", jumps_temp[:15])
# print("\n jumps[:,_]: \n", jumps[:,0])

# get ragged tensor containing indices where jumps occur inside each wedge shaped voxel
# jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], idx) #WAS THIS --wrong!!

jumps_rag = tf.RaggedTensor.from_value_rowids(jumps_temp[:,0], jumps_temp[:,1]) #TEST - should be this??
# jumps_rag = tf.RaggedTensor.from_value_rowids(jumps[:,1], jumps[:,0]) #TEST
# print("\n jumps_rag \n", jumps_rag[:15])
print("\n rads[0,_] \n", rads[:30,0])


# append 0 to beginning of each ragged elemet of jumps_rag
zeros = tf.zeros(tf.shape(jumps_rag)[0])[:,None]
zeros = tf.cast(tf.RaggedTensor.from_tensor(zeros), tf.int64)
jumps_rag = tf.concat([zeros.with_row_splits_dtype(tf.int64), jumps_rag.with_row_splits_dtype(tf.int64)], axis = 1)
print("\n jumps_rag \n", jumps_rag[:15])
# print("\n jumps_rag \n", jumps_rag.to_tensor())

#get num points between each jump 
npts_between_jumps = tf.experimental.numpy.diff(jumps_rag.to_tensor())
# print("\n npts_between_jumps:\n ",npts_between_jumps[:10,:10])
# print("\n npts_between_jumps:\n ",npts_between_jumps)

#flag spikes where all npts_between_jumps are less than mnp
biggest_jump = tf.math.reduce_max(npts_between_jumps, axis = 1)
# print("\n biggest_jump \n", biggest_jump)
mnp = 100 #minumum number of points per cluster (defined in ICET class)
good_clusters = tf.cast(tf.math.greater(biggest_jump, mnp), tf.int32)
# good_clusters = tf.RaggedTensor.from_value_rowids(good_clusters, y).to_tensor()[:,0]  #fill in skipped indices
print("\n good_clusters (hold on to this for later) \n", good_clusters)

#get idx within jumps_rag corresponding to first sufficiently large jump
big_enough = tf.cast(tf.math.greater(npts_between_jumps, 100), tf.int32)
# print(big_enough[:10])
first_big_enough = tf.math.argmax(big_enough, axis = 1)
print("\n first_big_enough: \n", first_big_enough)
# print("\n first_big_enough: \n", first_big_enough)

print("\n everything looks good up to this point :)")

old slow soln shape: 
 tf.Tensor([247   2], shape=(2,), dtype=int32)

 jumps: 
 tf.Tensor(
[[  0  42]
 [  0  74]
 [  0 168]
 ...
 [865 129]
 [868 129]
 [869 129]], shape=(793, 2), dtype=int64)

 y 
 tf.Tensor([ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14], shape=(15,), dtype=int64) 
 tf.Tensor([247], shape=(1,), dtype=int32) 
 
 idx 
 tf.Tensor([0 1 1 1 1 1 1 1 1 1 1 1 2 3 4], shape=(15,), dtype=int32) 
 tf.Tensor([793], shape=(1,), dtype=int32)

 rads[0,_] 
 tf.Tensor(
[5.1146927 5.123184  5.1250353 5.127008  5.127362  5.1277847 5.1285396
 5.1299496 5.1313977 5.131942  5.1328163 5.134753  5.1356616 5.135729
 5.137458  5.1384068 5.1388755 5.1390233 5.1399364 5.1399913 5.1417584
 5.1420884 5.142145  5.1428895 5.144201  5.1453676 5.1464715 5.146884
 5.1485653 5.1491933], shape=(30,), dtype=float32)

 jumps_rag 
 <tf.RaggedTensor [[0, 512], [0, 790, 800, 804, 806, 808, 809, 810, 811, 831, 836, 837],
 [0, 461], [0, 288], [0, 624], [0, 124, 126, 127, 469], [0, 41, 123, 529],
 [0, 499], [0, 

In [62]:
#get inner and outer (simple way-- just use radial measurements of inner and outermost points in cluster)
#get index of radial measurements that defines inner bounds of voxel 
inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims=1) + 1
# print("\n inner_idx: \n", inner_idx, "\n")
inner  = tf.gather(tf.transpose(rads), inner_idx, batch_dims=1)
# print("\n inner: \n", inner)

outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1)
# print("\n outer_idx: \n", outer_idx, "\n")
outer  = tf.gather(tf.transpose(rads), outer_idx, batch_dims=1)
# print("\n outer: \n", outer)

# bounds = np.array([inner, outer]).T
bounds = tf.concat((inner[:,None], outer[:,None]), axis = 1)
bounds = tf.cast(good_clusters[:,None], tf.float32) * bounds
print(bounds[:10])

tf.Tensor(
[[ 5.123184   7.035908 ]
 [15.225079  29.694502 ]
 [ 6.3673534  8.253854 ]
 [ 4.2986064  5.0459332]
 [ 7.830504   9.93405  ]
 [11.987087  13.451402 ]
 [17.638283  23.865181 ]
 [ 4.85814    6.3454547]
 [ 4.8440337  5.598544 ]
 [ 8.26794   12.904359 ]], shape=(10, 2), dtype=float32)


In [152]:
#get inner and outer as described in spherical paper
#  (max half distance betweeen last in cluster and first point outside cluster)

max_buffer = 3.0

inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough, batch_dims=1) # + 1 #DEBUG -- do we need +1 here??
inner_radii  = tf.gather(tf.transpose(rads), inner_idx, batch_dims=1)
#get radial distance of closest point on near side of cluster
next_inner_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough-1, batch_dims=1)
next_inner_radii = tf.gather(tf.transpose(rads), next_inner_idx, batch_dims=1) 

# print(first_big_enough)
# print(inner_idx[:15])
# print(next_inner_idx[:15])
# print(inner_radii[:15])
# print(next_inner_radii[:15])
# print(test[:15])

#will be zero when inner idx occurs on first element of spike, otherwise correct soln
inner_skip_dist = inner_radii - next_inner_radii
# print("before: \n",inner_skip_dist[:15])
#of these nonzero distances, some are smaller than max_buffer -> leave as is, all else set to max_buffer
too_big = tf.cast(tf.math.less(inner_skip_dist*2, max_buffer), tf.float32)
# print(too_big[:15])
inner_skip_dist = inner_skip_dist*too_big + (1-too_big)*max_buffer
# print("after: \n",inner_skip_dist[:15])
temp = tf.cast(tf.math.equal(inner_skip_dist, 0), tf.float32)*max_buffer #set all others to max_buffer
# print(temp[:15])
# print(- inner_skip_dist - temp)
inner = inner_radii - inner_skip_dist - temp

#good up to here-----------------


outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1) - 1
outer_radii  = tf.gather(tf.transpose(rads), outer_idx, batch_dims=1)
next_outer_idx = tf.gather(jumps_rag.to_tensor(), first_big_enough + 1, batch_dims=1) +1
next_outer_radii = tf.gather(tf.transpose(rads), next_outer_idx, batch_dims=1) 

# print(outer_idx[:15])
# print(next_outer_idx[:15])
print(outer_radii[:15])
print(next_outer_radii[:15])

outer_skip_dist = next_outer_radii - outer_radii
# print(outer_skip_dist[:15])
too_big = tf.cast(tf.math.less(outer_skip_dist*2, max_buffer), tf.float32)
print(too_big[:15])

outer_skip_dist = outer_skip_dist*too_big + (1-too_big)*max_buffer
print(outer_skip_dist[:15])

outer = outer_radii + outer_skip_dist
print(outer[:15])

tf.Tensor(
[ 7.033653  29.642231   8.244221   5.0440245  9.91175   13.440241
 23.513298   6.3405857  5.5822115 12.863779  14.787328  13.212239
 16.214926   5.4939666  5.8556833], shape=(15,), dtype=float32)
tf.Tensor(
[1000.          30.437288  1000.        1000.        1000.
   14.554577  1000.        1000.           7.4011145   15.344234
   15.426538    14.208025    34.42689   1000.        1000.       ], shape=(15,), dtype=float32)
tf.Tensor([0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.], shape=(15,), dtype=float32)
tf.Tensor(
[3.        0.7950573 3.        3.        3.        1.114336  3.
 3.        3.        3.        0.6392107 0.9957857 3.        3.
 3.       ], shape=(15,), dtype=float32)
tf.Tensor(
[10.033653  30.437288  11.244221   8.044024  12.91175   14.554577
 26.513298   9.340586   8.5822115 15.863779  15.426538  14.208025
 19.214926   8.493967   8.855683 ], shape=(15,), dtype=float32)


# fit_gaussian()

In [None]:
def fg2(cloud, rag, npts):
    """new method of fitting gaussian to better handle ragged input data"""
    numSamples = 3
    
    coords = tf.gather(cloud, rag)
    mu = tf.math.reduce_mean(coords, axis = 1)[:,None]
#     mu = tf.math.reduce_mean(coords, axis = 1) #old
#     print(mu)

#   TODO: try randomly sampling 30 points from each ragged cell, use reduced num pts to calculate covariance
#     subsampled = tf.map_fn(sample, it.inside2) #works but SLOW
#     subsampled = tf.map_fn(sample, it.inside2, parallel_iterations=True)
#     subsampled = tf.gather(rag,tf.range(tf.shape(rag)[0]))[:numSamples] #wrong
#     print(subsampled)

    xpos = tf.gather(cloud[:,0], rag)
    ypos = tf.gather(cloud[:,1], rag)
    zpos = tf.gather(cloud[:,2], rag)
#     c = tfp.stats.covariance(xpos.to_tensor(), ypos.to_tensor())

#     print(xpos)
    idx = tf.range(30)
    xpos = tf.gather(xpos, idx, axis = 1)
    ypos = tf.gather(ypos, idx, axis = 1)
    zpos = tf.gather(zpos, idx, axis = 1)
    print(xpos)

    xx = tf.math.reduce_sum(tf.math.square(xpos - mu[:,:,0] ), axis = 1)/npts
    yy = tf.math.reduce_sum(tf.math.square(ypos - mu[:,:,1] ), axis = 1)/npts
    zz = tf.math.reduce_sum(tf.math.square(zpos - mu[:,:,2] ), axis = 1)/npts
    xy = tf.math.reduce_sum( (xpos - mu[:,:,0])*(ypos - mu[:,:,1]), axis = 1)/npts  #+
    xz = tf.math.reduce_sum( (xpos - mu[:,:,0])*(zpos - mu[:,:,2]), axis = 1)/npts #-
    yz = tf.math.reduce_sum( (ypos - mu[:,:,1])*(zpos - mu[:,:,2]), axis = 1)/npts #-

    sigma = tf.Variable([xx, xy, xz,
                        xy, yy, yz,
                        xz, yz, zz]) 
    sigma = tf.reshape(tf.transpose(sigma), (tf.shape(sigma)[1] ,3,3))
        
#     mu = None
    return(mu, sigma)

@tf.function
def sample(x, samples=3):
  """https://stackoverflow.com/questions/71073873/sample-from-ragged-tensor"""  
  length = tf.shape(x)[0]
#   was this
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.random.shuffle(tf.range(length))[:samples]))
 
#   test
#   x = tf.cond(tf.less_equal(length, samples), lambda: x, lambda: tf.gather(x, tf.range(length))[:samples])
  x = tf.gather(x,tf.range(length))[:samples]

    
  return x

In [None]:
s = time.time()
mu2, sigma2 = it.fit_gaussian(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print("\n took", time.time() - s, " s with old method")

s = time.time()
mu2, sigma2 = fg2(it.cloud2_tensor, it.inside2, tf.cast(it.npts2, tf.float32))
print(" \n took", time.time() - s, " s with new method")

# print(it.npts2)
# print(it.inside2)

In [None]:
# vect = it.inside2
vect = tf.ragged.constant([[],[1,2,3,4],[5,4,3,2,1],[6],[99],[7,8,9,10,11,12,13]])
# print(tf.shape(vect)[0])
print("vect", vect)
c = tf.map_fn(sample, vect)
# print(c)

#wrong
# test = tf.gather(vect,tf.range(tf.shape(vect)[0]))[:3]
idx = tf.range(3)
print("\n idx", idx)
test = tf.gather(vect, idx , axis = 1)
print("\n test", test) #NOTE: indices with too few elements produce unexpected behavior
                        #that doesn't matter since they get suppressed anyways
    
vec2 = tf.random.categorical(vect, 2)
    