# pc

> Point Cloud data manipulation

In [None]:
#| default_exp cli/pc

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import math
import zarr
import cupy as cp
import numpy as np
from matplotlib import pyplot as plt

import dask
from dask import array as da
from dask.distributed import Client, LocalCluster

from decorrelation.pc import pc2ras, pc_union, pc_intersect, pc_diff
from decorrelation.cli.utils.logging import get_logger, log_args
from decorrelation.cli.utils.chunk_size import (get_pc_chunk_size_from_n_pc_chunk, 
                                                get_pc_chunk_size_from_pc_chunk_size, 
                                                get_pc_chunk_size_from_n_az_chunk,
                                                get_az_chunk_size_from_n_pc_chunk)

from fastcore.script import call_parse, Param

In [None]:
#| export
@log_args
def de_ras2pc(idx:str, # point cloud index
              ras:str|list, # path (in string) or list of path for raster data
              pc:str|list, # output, path (in string) or list of path for point cloud data
              pc_chunk_size:int=None, # output point chunk size
              n_pc_chunk:int=None, # output number of chunk
              hd_chunk_size:tuple|list=None, # output high dimension chunk size, tuple or list of tuple, same as input raster data by default
              log:str=None, # log file. Default: no log file
):
    '''Convert raster data to point cloud data'''
    logger = get_logger(logfile=log)

    idx_zarr = zarr.open(idx,mode='r')
    logger.zarr_info(idx,idx_zarr)
    assert idx_zarr.ndim == 2, "idx dimentation is not 2."
    pc_chunk_size = get_pc_chunk_size_from_pc_chunk_size('idx','pc',idx_zarr.chunks[1],idx_zarr.shape[1],logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)

    logger.info('loading idx into memory.')
    idx = zarr.open(idx,mode='r')[:]
    n_pc = idx.shape[1]

    if isinstance(ras,str):
        assert isinstance(pc,str)
        ras_list = [ras]; pc_list = [pc]
        if hd_chunk_size is not None:
            assert isinstance(hd_chunk_size,tuple)
            hd_chunk_size_list = [hd_chunk_size]
        else:
            hd_chunk_size_list = [None]
    else:
        assert isinstance(ras,list); assert isinstance(pc,list)
        ras_list = ras; pc_list = pc
        n_data = len(ras_list)
        if hd_chunk_size is not None:
            assert isinstance(hd_chunk_size,list)
            hd_chunk_size_list = hd_chunk_size
        else:
            hd_chunk_size_list = [None]*n_data

    logger.info('starting dask local cluster.')
    cluster = LocalCluster()
    client = Client(cluster)
    logger.info('dask local cluster started.')

    _pc_list = ()
    for ras_path, pc_path, hd_chunk_size in zip(ras_list,pc_list,hd_chunk_size_list):
        logger.info(f'start to slice on {ras_path}')
        ras_zarr = zarr.open(ras_path,'r'); logger.zarr_info(ras_path, ras_zarr)
        if hd_chunk_size is None:
            logger.info(f'hd_chunk_size not setted. Use the one from {ras_path}.')
            hd_chunk_size = ras_zarr.chunks[2:]
        logger.info(f'hd_chunk_size: {hd_chunk_size}.')

        ras = da.from_zarr(ras_path,chunks=(*ras_zarr.chunks[:2],*hd_chunk_size)); logger.darr_info('ras',ras)

        with dask.config.set(**{'array.slicing.split_large_chunks': False}):
            pc = ras.reshape(-1,*ras.shape[2:])[np.ravel_multi_index((idx[0],idx[1]),dims=ras.shape[:2])]
        
        logger.darr_info('pc', pc)
        logger.info('rechunk pc data:')
        pc = pc.rechunk((pc_chunk_size,*pc.chunksize[1:]))
        logger.darr_info('pc', pc)
        _pc = pc.to_zarr(pc_path,overwrite=True,compute=False)
        logger.info(f'saving to {pc_path}.')
        _pc_list += (_pc,)
    
    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_pc_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

In [None]:
#| export
#| hide
@call_parse
def console_de_ras2pc(idx:str, # point cloud index
                      ras:Param(type=str,required=True,nargs='+',help='one or more path for raster data')=None,
                      pc:Param(type=str,required=True,nargs='+',help='output, one or more path for point cloud data')=None,
                      pc_chunk_size:int=None, # output point chunk size, same as input idx by default
                      n_pc_chunk:int=None, # output number of chunk
                      hd_chunk_size:Param(type=str,nargs='+',help='''output high dimension chunk size,
                      each size should be wrapped in quotation marks and size in each dimension are seperated with ",",
                      same as input raster data by default''')=None,
                      log:str=None, # log file. Default: no log file
):
    '''Convert raster data to point cloud data'''
    if hd_chunk_size is not None:
        hd_chunk_size_ = []
        for size in hd_chunk_size:
            if len(size) == 0:
                size = ()
            else:
                size = size.split(',')
                size = tuple([int(i) for i in size])
            hd_chunk_size_.append(size)
    else:
        hd_chunk_size_ = None

    if len(ras)==1:
        ras = ras[0]
        pc = pc[0]
        if hd_chunk_size_ is not None:
            hd_chunk_size_ = hd_chunk_size_[0]

    de_ras2pc(idx,ras,pc,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk,hd_chunk_size=hd_chunk_size_,log=log)

In [None]:
!de_ras2pc -h

usage: de_ras2pc [-h] --ras RAS [RAS ...] --pc PC [PC ...]
                 [--pc_chunk_size PC_CHUNK_SIZE] [--n_pc_chunk N_PC_CHUNK]
                 [--hd_chunk_size HD_CHUNK_SIZE [HD_CHUNK_SIZE ...]] [--log LOG]
                 idx

Convert raster data to point cloud data

positional arguments:
  idx                                   point cloud index

options:
  -h, --help                            show this help message and exit
  --ras RAS [RAS ...]                   one or more path for raster data
  --pc PC [PC ...]                      output, one or more path for point cloud
                                        data
  --pc_chunk_size PC_CHUNK_SIZE         output point chunk size, same as input
                                        idx by default
  --n_pc_chunk N_PC_CHUNK               output number of chunk
  --hd_chunk_size HD_CHUNK_SIZE [HD_CHUNK_SIZE ...]
                                        output high dimension chunk size, each
                                 

Usage:

In [None]:
ras_data1 = np.random.rand(100,100).astype(np.float32)
ras_data2 = np.random.rand(100,100,3).astype(np.float32)+1j*np.random.rand(100,100,3).astype(np.float32)

idx = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx.sort()
idx = np.stack(np.unravel_index(idx,shape=(100,100))).astype(np.int32)

pc_data1 = ras_data1[idx[0],idx[1]]
pc_data2 = ras_data2[idx[0],idx[1]]

idx_zarr = zarr.open('pc/idx.zarr','w',shape=idx.shape,dtype=idx.dtype,chunks=(2,200))
ras_zarr1 = zarr.open('pc/ras1.zarr','w',shape=ras_data1.shape,dtype=ras_data1.dtype,chunks=(20,100))
ras_zarr2 = zarr.open('pc/ras2.zarr','w',shape=ras_data2.shape,dtype=ras_data2.dtype,chunks=(20,100,1))
idx_zarr[:] = idx
ras_zarr1[:] = ras_data1
ras_zarr2[:] = ras_data2

In [None]:
de_ras2pc('pc/idx.zarr','pc/ras1.zarr','pc/pc1.zarr')
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])

# !de_ras2pc pc/idx.zarr --ras pc/ras2.zarr --pc pc/pc2.zarr --hd_chunk_size '1'
# pc_zarr2 = zarr.open('pc/pc2.zarr','r')
# np.testing.assert_array_equal(pc_data2,pc_zarr2[:])

de_ras2pc('pc/idx.zarr',ras=['pc/ras1.zarr','pc/ras2.zarr'],pc=['pc/pc1.zarr','pc/pc2.zarr'],hd_chunk_size=[(),(1,)])
pc_zarr1 = zarr.open('pc/pc1.zarr','r')
pc_zarr2 = zarr.open('pc/pc2.zarr','r')
np.testing.assert_array_equal(pc_data1,pc_zarr1[:])
np.testing.assert_array_equal(pc_data2,pc_zarr2[:])

# !de_ras2pc pc/idx.zarr --ras pc/ras1.zarr pc/ras2.zarr --pc pc/pc1.zarr pc/pc2.zarr --hd_chunk_size '' '1'
# pc_zarr1 = zarr.open('pc/pc1.zarr','r')
# pc_zarr2 = zarr.open('pc/pc2.zarr','r')
# np.testing.assert_array_equal(pc_data1,pc_zarr1[:])
# np.testing.assert_array_equal(pc_data2,pc_zarr2[:])

2023-10-18 16:59:24 - de_ras2pc - INFO - fetching args:
2023-10-18 16:59:24 - de_ras2pc - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:59:24 - de_ras2pc - INFO - ras = 'pc/ras1.zarr'
2023-10-18 16:59:24 - de_ras2pc - INFO - pc = 'pc/pc1.zarr'
2023-10-18 16:59:24 - de_ras2pc - INFO - pc_chunk_size = None
2023-10-18 16:59:24 - de_ras2pc - INFO - n_pc_chunk = None
2023-10-18 16:59:24 - de_ras2pc - INFO - hd_chunk_size = None
2023-10-18 16:59:24 - de_ras2pc - INFO - log = None
2023-10-18 16:59:24 - de_ras2pc - INFO - fetching args done.
2023-10-18 16:59:24 - de_ras2pc - INFO - pc/idx.zarr zarray shape: (2, 1000)
2023-10-18 16:59:24 - de_ras2pc - INFO - pc/idx.zarr zarray chunks: (2, 200)
2023-10-18 16:59:24 - de_ras2pc - INFO - pc/idx.zarr zarray dtype: int32
2023-10-18 16:59:24 - de_ras2pc - INFO - automatically determine pc_chunk_size from pc_chunk_size of idx.
2023-10-18 16:59:24 - de_ras2pc - INFO - pc_chunk_size for pc: 200
2023-10-18 16:59:24 - de_ras2pc - INFO - loading idx into memory.

In [None]:
#| export
@log_args
def de_pc2ras(idx:str, # point cloud index
              pc:str|list, # path (in string) or list of path for point cloud data
              ras:str|list, # output, path (in string) or list of path for raster data
              shape:tuple, # shape of one image (nlines,width)
              az_chunk_size:int=None, # output azimuth chunk size, 
              n_az_chunk:int=None, # # output number of azimuth chunks 
              log:str=None, # log file. Default: no log file
):
    '''Convert point cloud data to raster data, filled with nan'''
    logger = get_logger(logfile=log)

    idx_zarr = zarr.open(idx,mode='r')
    logger.info('idx dataset shape: '+str(idx_zarr.shape))
    logger.info('idx dataset chunks: '+str(idx_zarr.chunks))
    assert idx_zarr.ndim == 2, "idx dimentation is not 2."
    az_chunk_size = get_az_chunk_size_from_n_pc_chunk('idx','ras',idx_zarr.shape[1],idx_zarr.chunks[1],shape[0],logger=logger,az_chunk_size=az_chunk_size,n_az_chunk=n_az_chunk)

    logger.info('loading idx into memory.')
    idx = zarr.open(idx,mode='r')[:]
    n_pc = idx.shape[1]
    
    if isinstance(pc,str):
        assert isinstance(ras,str)
        pc_list = [pc]; ras_list = [ras]
    else:
        assert isinstance(pc,list); assert isinstance(ras,list)
        pc_list = pc; ras_list = ras

    logger.info('starting dask local cluster.')
    cluster = LocalCluster()
    client = Client(cluster)
    logger.info('dask local cluster started.')

    _ras_list = ()

    for ras_path, pc_path in zip(ras_list,pc_list):
        logger.info(f'start to work on {pc_path}')
        pc_zarr = zarr.open(pc_path,'r')
        logger.zarr_info(pc_path,pc_zarr)
        
        pc = da.from_zarr(pc_path)
        logger.darr_info('pc', pc)
        ras = da.empty((shape[0]*shape[1],*pc.shape[1:]),chunks = (az_chunk_size*shape[1],*pc_zarr.chunks[1:]), dtype=pc.dtype)
        ras[:] = np.nan
        ras[np.ravel_multi_index((idx[0],idx[1]),dims=shape)] = pc
        ras = ras.reshape(*shape,*pc.shape[1:])
        logger.info('create ras dask array')
        logger.darr_info('ras', ras)
        _ras = ras.to_zarr(ras_path,overwrite=True,compute=False)
        _ras_list += (_ras,)

    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_ras_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

In [None]:
#| export
#| hide
@call_parse
def console_de_pc2ras(idx:str, # point cloud index
                      pc:Param(type=str,required=True,nargs='+',help='one or more path for point cloud data')=None,
                      ras:Param(type=str,required=True,nargs='+',help='output, one or more path for raster data')=None,
                      shape:Param(type=str,required=True,help='shape of one image "nlines,width"')=None,
                      az_chunk_size:int=None, # output azimuth chunk size
                      n_az_chunk:int=None, # output number of azimuth chunks 
                      log:str=None, # log file. Default: no log file
):
    '''Convert point cloud data to raster data'''
    if len(ras)==1:
        ras = ras[0]
        pc = pc[0]
    
    shape = shape.split(',')
    shape = [int(i) for i in shape]
    shape=tuple(shape)

    de_pc2ras(idx,pc,ras,shape,az_chunk_size=az_chunk_size,n_az_chunk=n_az_chunk,log=log)

In [None]:
!de_pc2ras -h

usage: de_pc2ras [-h] --pc PC [PC ...] --ras RAS [RAS ...] --shape SHAPE
                 [--az_chunk_size AZ_CHUNK_SIZE] [--n_az_chunk N_AZ_CHUNK]
                 [--log LOG]
                 idx

Convert point cloud data to raster data

positional arguments:
  idx                            point cloud index

options:
  -h, --help                     show this help message and exit
  --pc PC [PC ...]               one or more path for point cloud data
  --ras RAS [RAS ...]            output, one or more path for raster data
  --shape SHAPE                  shape of one image "nlines,width"
  --az_chunk_size AZ_CHUNK_SIZE  output azimuth chunk size
  --n_az_chunk N_AZ_CHUNK        output number of azimuth chunks
  --log LOG                      log file. Default: no log file


Usage:

In [None]:
pc_data1 = np.random.rand(1000).astype(np.float32)
pc_data2 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)

idx = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx.sort()
idx = np.stack(np.unravel_index(idx,shape=(100,100))).astype(np.int32)

ras_data1 = np.zeros((100,100),dtype=np.float32)
ras_data2 = np.zeros((100,100,3),dtype=np.complex64)
ras_data1[:] = np.nan
ras_data2[:] = np.nan

ras_data1[idx[0],idx[1]] = pc_data1
ras_data2[idx[0],idx[1]] = pc_data2

idx_zarr = zarr.open('pc/idx.zarr','w',shape=idx.shape,dtype=idx.dtype,chunks=(2,200))
pc_zarr1 = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,))
pc_zarr2 = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
idx_zarr[:] = idx
pc_zarr1[:] = pc_data1
pc_zarr2[:] = pc_data2

In [None]:
de_pc2ras('pc/idx.zarr','pc/pc1.zarr','pc/ras1.zarr',shape=(100,100),az_chunk_size=20)
ras_zarr1 = zarr.open('pc/ras1.zarr','r')
np.testing.assert_array_equal(ras_data1,ras_zarr1[:])

!de_pc2ras pc/idx.zarr/ --pc pc/pc2.zarr/ --ras pc/ras2.zarr/ --shape "100,100" --az_chunk_size 20
ras_zarr2 = zarr.open('pc/ras2.zarr','r')
np.testing.assert_array_equal(ras_data2,ras_zarr2[:])

de_pc2ras('pc/idx.zarr',['pc/pc1.zarr','pc/pc2.zarr'],['pc/ras1.zarr','pc/ras2.zarr'],shape=(100,100),az_chunk_size=20)
ras_zarr1 = zarr.open('pc/ras1.zarr','r')
ras_zarr2 = zarr.open('pc/ras2.zarr','r')
np.testing.assert_array_equal(ras_data1,ras_zarr1[:])
np.testing.assert_array_equal(ras_data2,ras_zarr2[:])

!de_pc2ras pc/idx.zarr/ --pc pc/pc1.zarr/ pc/pc2.zarr/ --ras pc/ras1.zarr/ pc/ras2.zarr/ --shape "100,100" --az_chunk_size 20
ras_zarr1 = zarr.open('pc/ras1.zarr','r')
ras_zarr2 = zarr.open('pc/ras2.zarr','r')
np.testing.assert_array_equal(ras_data1,ras_zarr1[:])
np.testing.assert_array_equal(ras_data2,ras_zarr2[:])

2023-10-18 16:57:31 - de_pc2ras - INFO - fetching args:
2023-10-18 16:57:31 - de_pc2ras - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:57:31 - de_pc2ras - INFO - pc = 'pc/pc1.zarr'
2023-10-18 16:57:31 - de_pc2ras - INFO - ras = 'pc/ras1.zarr'
2023-10-18 16:57:31 - de_pc2ras - INFO - shape = (100, 100)
2023-10-18 16:57:31 - de_pc2ras - INFO - az_chunk_size = 20
2023-10-18 16:57:31 - de_pc2ras - INFO - n_az_chunk = None
2023-10-18 16:57:31 - de_pc2ras - INFO - log = None
2023-10-18 16:57:31 - de_pc2ras - INFO - fetching args done.
2023-10-18 16:57:31 - de_pc2ras - INFO - idx dataset shape: (2, 1000)
2023-10-18 16:57:31 - de_pc2ras - INFO - idx dataset chunks: (2, 200)
2023-10-18 16:57:31 - de_pc2ras - INFO - got az_chunk_size for ras: 20
2023-10-18 16:57:31 - de_pc2ras - INFO - loading idx into memory.
2023-10-18 16:57:31 - de_pc2ras - INFO - starting dask local cluster.
2023-10-18 16:57:33 - de_pc2ras - INFO - dask local cluster started.
2023-10-18 16:57:33 - de_pc2ras - INFO - start to wor

In [None]:
#| export
@call_parse
@log_args
def de_pc_union(idx1:str, # index of the first point cloud
                idx2:str, # index of the second point cloud
                idx:str, # output, index of the union point cloud
                pc1:str|list=None, # path (in string) or list of path for the first point cloud data
                pc2:str|list=None, # path (in string) or list of path for the second point cloud data
                pc:str|list=None, #output, path (in string) or list of path for the union point cloud data
                pc_chunk_size:int=None, # chunk size in output data,optional
                n_pc_chunk:int=None, # number of chunk in output data, optional
                log:str=None, # log file. Default: no log file
):
    '''Get the union of two point cloud dataset.
    For points at their intersection, pc_data1 rather than pc_data2 is copied to the result pc_data.
    `pc_chunk_size` and `n_pc_chunk` are used to determine the final pc_chunk_size.
    If non of them are provided, the n_pc_chunk is set to n_chunk in idx1.
    '''
    logger = get_logger(logfile=log)

    idx1_zarr = zarr.open(idx1,mode='r'); logger.zarr_info(idx1,idx1_zarr)
    idx2_zarr = zarr.open(idx2,mode='r'); logger.zarr_info(idx2,idx2_zarr)
    logger.info('loading idx1 and idx2 into memory.')
    idx1 = idx1_zarr[:]; idx2 = idx2_zarr[:]

    logger.info('calculate the union')
    idx_path = idx
    idx, inv_iidx1, inv_iidx2, iidx2 = pc_union(idx1,idx2)
    n_pc = idx.shape[1]
    logger.info(f'number of points in the union: {idx.shape[1]}')
    pc_chunk_size = get_pc_chunk_size_from_n_pc_chunk('idx1','idx',idx1_zarr.shape[1],idx1_zarr.chunks[1],n_pc,logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)
    
    idx_zarr = zarr.open(idx_path,'w',shape=idx.shape,dtype=idx.dtype,chunks=(2,pc_chunk_size))
    logger.info('write union idx')
    idx_zarr[:] = idx
    logger.info('write done')
    logger.zarr_info(idx_path, idx_zarr)
    
    if pc1 is None:
        logger.info('no point cloud data provided, exit.')
        return None

    if isinstance(pc1,str):
        assert isinstance(pc2,str); assert isinstance(pc,str)
        pc1_list = [pc1]; pc2_list = [pc2]; pc_list = [pc]
    else:
        assert isinstance(pc1,list); assert isinstance(pc2,list); assert isinstance(pc,list)
        pc1_list = pc1; pc2_list = pc2; pc_list = pc

    logger.info('starting dask local cluster.')
    cluster = LocalCluster()
    client = Client(cluster)
    logger.info('dask local cluster started.')
    
    _pc_list = ()
    for pc1_path, pc2_path, pc_path in zip(pc1_list,pc2_list,pc_list):
        pc1_zarr = zarr.open(pc1_path,'r'); pc2_zarr = zarr.open(pc2_path,'r')
        logger.zarr_info(pc1_path, pc1_zarr); logger.zarr_info(pc2_path, pc2_zarr);
        pc1 = da.from_zarr(pc1_path); pc2 = da.from_zarr(pc2_path)
        logger.darr_info('pc1', pc1); logger.darr_info('pc2',pc2)
        logger.info('set up union pc data dask array.')
        pc = da.empty((n_pc,*pc1.shape[1:]),chunks = (pc_chunk_size,*pc1.chunks[1:]), dtype=pc1.dtype)
        logger.darr_info('pc',pc)
        pc[inv_iidx1] = pc1
        pc[inv_iidx2] = pc2[iidx2]
        _pc = pc.to_zarr(pc_path, overwrite=True,compute=False)
        _pc_list += (_pc,)

    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_pc_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

Usage:

In [None]:
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

idx1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx1.sort()
idx1 = np.stack(np.unravel_index(idx1,shape=(100,100))).astype(np.int32)

idx2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
idx2.sort()
idx2 = np.stack(np.unravel_index(idx2,shape=(100,100))).astype(np.int32)

idx, inv_iidx1, inv_iidx2, iidx2 = pc_union(idx1,idx2)

pc_data = np.empty((idx.shape[1],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[inv_iidx1] = pc_data1
pc_data[inv_iidx2] = pc_data2[iidx2]

idx1_zarr = zarr.open('pc/idx1.zarr','w',shape=idx1.shape,dtype=idx1.dtype,chunks=(2,200))
idx2_zarr = zarr.open('pc/idx2.zarr','w',shape=idx2.shape,dtype=idx2.dtype,chunks=(2,200))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
idx1_zarr[:] = idx1
idx2_zarr[:] = idx2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2

In [None]:
de_pc_union('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr')
de_pc_union('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr','pc/pc1.zarr','pc/pc2.zarr','pc/pc.zarr')
idx_zarr = zarr.open('pc/idx.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(idx_zarr[:],idx)
np.testing.assert_array_equal(pc_zarr[:],pc_data)

2023-10-18 16:52:14 - de_pc_union - INFO - fetching args:
2023-10-18 16:52:14 - de_pc_union - INFO - idx1 = 'pc/idx1.zarr'
2023-10-18 16:52:14 - de_pc_union - INFO - idx2 = 'pc/idx2.zarr'
2023-10-18 16:52:14 - de_pc_union - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:52:14 - de_pc_union - INFO - pc1 = None
2023-10-18 16:52:14 - de_pc_union - INFO - pc2 = None
2023-10-18 16:52:14 - de_pc_union - INFO - pc = None
2023-10-18 16:52:14 - de_pc_union - INFO - pc_chunk_size = None
2023-10-18 16:52:14 - de_pc_union - INFO - n_pc_chunk = None
2023-10-18 16:52:14 - de_pc_union - INFO - log = None
2023-10-18 16:52:14 - de_pc_union - INFO - fetching args done.
2023-10-18 16:52:14 - de_pc_union - INFO - pc/idx1.zarr zarray shape: (2, 1000)
2023-10-18 16:52:14 - de_pc_union - INFO - pc/idx1.zarr zarray chunks: (2, 200)
2023-10-18 16:52:14 - de_pc_union - INFO - pc/idx1.zarr zarray dtype: int32
2023-10-18 16:52:14 - de_pc_union - INFO - pc/idx2.zarr zarray shape: (2, 800)
2023-10-18 16:52:14 - de_pc_uni

In [None]:
#| export
@call_parse
@log_args
def de_pc_intersect(idx1:str, # index of the first point cloud
                    idx2:str, # index of the second point cloud
                    idx:str, # output, index of the union point cloud
                    pc1:str|list=None, # path (in string) or list of path for the first point cloud data
                    pc2:str|list=None, # path (in string) or list of path for the second point cloud data
                    pc:str|list=None, #output, path (in string) or list of path for the union point cloud data
                    pc_chunk_size:int=None, # chunk size in output data,optional
                    n_pc_chunk:int=None, # number of chunk in output data, optional
                    prefer_1=True, # save pc1 on intersection to output pc dataset by default `True`. Otherwise, save data from pc2
                    log:str=None, # log file. Default: no log file
):
    '''Get the intersection of two point cloud dataset.
    `pc_chunk_size` and `n_pc_chunk` are used to determine the final pc_chunk_size.
    If non of them are provided, the n_pc_chunk is set to n_chunk in idx1.
    '''
    logger = get_logger(logfile=log)

    idx1_zarr = zarr.open(idx1,mode='r'); logger.zarr_info(idx1,idx1_zarr)
    idx2_zarr = zarr.open(idx2,mode='r'); logger.zarr_info(idx2,idx2_zarr)
    logger.info('loading idx1 and idx2 into memory.')
    idx1 = idx1_zarr[:]; idx2 = idx2_zarr[:]

    logger.info('calculate the intersection')
    idx_path = idx
    idx, iidx1, iidx2 = pc_intersect(idx1,idx2)
    n_pc = idx.shape[1]
    logger.info(f'number of points in the intersection: {idx.shape[1]}')
    pc_chunk_size = get_pc_chunk_size_from_n_pc_chunk('idx1','idx',idx1_zarr.shape[1],idx1_zarr.chunks[1],n_pc,logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)
    
    idx_zarr = zarr.open(idx_path,'w',shape=idx.shape,dtype=idx.dtype,chunks=(2,pc_chunk_size))
    logger.info('write intersect idx')
    idx_zarr[:] = idx
    logger.info('write done')
    logger.zarr_info(idx_path, idx_zarr)

    if (pc1 is None) and (pc2 is None):
        logger.info('no point cloud data provided, exit.')
        return None

    if prefer_1:
        logger.info('select pc1 as pc_input.')
        iidx = iidx1; pc_input = pc1
    else:
        logger.info('select pc2 as pc_input.')
        iidx = iidx2; pc_input = pc2

    if isinstance(pc_input,str):
        assert isinstance(pc,str)
        pc_input_list = [pc_input]; pc_list = [pc]
    else:
        assert isinstance(pc_input,list); assert isinstance(pc,list)
        pc_input_list = pc_input; pc_list = pc

    logger.info('starting dask local cluster.')
    cluster = LocalCluster(); client = Client(cluster)
    logger.info('dask local cluster started.')
    
    _pc_list = ()
    for pc_input_path, pc_path in zip(pc_input_list,pc_list):
        pc_input_zarr = zarr.open(pc_input_path,'r')
        logger.zarr_info(pc_input_path,pc_input_zarr)
        pc_input = da.from_zarr(pc_input_path)
        logger.darr_info('pc_input', pc_input)

        logger.info('set up intersect pc data dask array.')
        pc = da.empty((n_pc,*pc_input.shape[1:]),chunks = (pc_chunk_size,*pc_input.chunks[1:]), dtype=pc_input.dtype)
        logger.darr_info('pc',pc)
        pc[:] = pc_input[iidx]
        _pc = pc.to_zarr(pc_path, overwrite=True,compute=False)
        _pc_list += (_pc,)

    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_pc_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

Usage:

In [None]:
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)
pc_data2 = np.random.rand(800,3).astype(np.float32)+1j*np.random.rand(800,3).astype(np.float32)

idx1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx1.sort()
idx1 = np.stack(np.unravel_index(idx1,shape=(100,100))).astype(np.int32)

idx2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
idx2.sort()
idx2 = np.stack(np.unravel_index(idx2,shape=(100,100))).astype(np.int32)

idx, iidx1, iidx2 = pc_intersect(idx1,idx2)

pc_data = np.empty((idx.shape[1],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data2[iidx2]

idx1_zarr = zarr.open('pc/idx1.zarr','w',shape=idx1.shape,dtype=idx1.dtype,chunks=(2,200))
idx2_zarr = zarr.open('pc/idx2.zarr','w',shape=idx2.shape,dtype=idx2.dtype,chunks=(2,200))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
pc2_zarr = zarr.open('pc/pc2.zarr','w',shape=pc_data2.shape,dtype=pc_data2.dtype,chunks=(200,1))
idx1_zarr[:] = idx1
idx2_zarr[:] = idx2
pc1_zarr[:] = pc_data1
pc2_zarr[:] = pc_data2

In [None]:
de_pc_intersect('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr')
de_pc_intersect('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr',pc2='pc/pc2.zarr', pc='pc/pc.zarr',prefer_1=False)
idx_zarr = zarr.open('pc/idx.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(idx_zarr[:],idx)
np.testing.assert_array_equal(pc_zarr[:],pc_data)

2023-10-18 16:45:03 - de_pc_intersect - INFO - fetching args:
2023-10-18 16:45:03 - de_pc_intersect - INFO - idx1 = 'pc/idx1.zarr'
2023-10-18 16:45:03 - de_pc_intersect - INFO - idx2 = 'pc/idx2.zarr'
2023-10-18 16:45:03 - de_pc_intersect - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc1 = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc2 = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc_chunk_size = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - n_pc_chunk = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - prefer_1 = True
2023-10-18 16:45:03 - de_pc_intersect - INFO - log = None
2023-10-18 16:45:03 - de_pc_intersect - INFO - fetching args done.
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc/idx1.zarr zarray shape: (2, 1000)
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc/idx1.zarr zarray chunks: (2, 200)
2023-10-18 16:45:03 - de_pc_intersect - INFO - pc/idx1.zarr zarray dtyp

In [None]:
#| export
@call_parse
@log_args
def de_pc_diff(idx1:str, # index of the first point cloud
               idx2:str, # index of the second point cloud
               idx:str, # output, index of the union point cloud
               pc1:str|list=None, # path (in string) or list of path for the first point cloud data
               pc:str|list=None, #output, path (in string) or list of path for the union point cloud data
               pc_chunk_size:int=None, # chunk size in output data,optional
               n_pc_chunk:int=None, # number of chunk in output data, optional
               log:str=None, # log file. Default: no log file
              ):
    '''Get the point cloud in `idx1` that are not in `idx2`.
    `pc_chunk_size` and `n_pc_chunk` are used to determine the final pc_chunk_size.
    If non of them are provided, the n_pc_chunk is set to n_chunk in idx1.
    '''
    logger = get_logger(logfile=log)

    idx1_zarr = zarr.open(idx1,mode='r'); logger.zarr_info(idx1,idx1_zarr)
    idx2_zarr = zarr.open(idx2,mode='r'); logger.zarr_info(idx2,idx2_zarr)
    logger.info('loading idx1 and idx2 into memory.')
    idx1 = idx1_zarr[:]; idx2 = idx2_zarr[:]

    logger.info('calculate the diff.')
    idx_path = idx
    idx, iidx1 = pc_diff(idx1,idx2)
    n_pc = idx.shape[1]
    logger.info(f'number of points in the diff: {idx.shape[1]}')
    pc_chunk_size = get_pc_chunk_size_from_n_pc_chunk('idx1','idx',idx1_zarr.shape[1],idx1_zarr.chunks[1],n_pc,logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)
    
    idx_zarr = zarr.open(idx_path,'w',shape=idx.shape,dtype=idx.dtype,chunks=(2,pc_chunk_size))
    logger.info('write intersect idx')
    idx_zarr[:] = idx
    logger.info('write done')
    logger.zarr_info(idx_path, idx_zarr)

    if pc1 is None:
        logger.info('no point cloud data provided, exit.')
        return None

    if isinstance(pc1,str):
        assert isinstance(pc,str)
        pc1_list = [pc1]; pc_list = [pc]
    else:
        assert isinstance(pc1,list); assert isinstance(pc,list)
        pc1_list = pc1; pc_list = pc

    logger.info('starting dask local cluster.')
    cluster = LocalCluster(); client = Client(cluster)
    logger.info('dask local cluster started.')
    
    _pc_list = ()
    for pc1_path, pc_path in zip(pc1_list,pc_list):
        pc1_zarr = zarr.open(pc1_path,'r'); logger.zarr_info(pc1_path, pc1_zarr)
        pc1 = da.from_zarr(pc1_path); logger.darr_info('pc1', pc1)
        logger.info('set up diff pc data dask array.')
        pc = da.empty((n_pc,*pc1.shape[1:]),chunks = (pc_chunk_size,*pc1.chunks[1:]), dtype=pc1.dtype)
        logger.darr_info('pc',pc)
        pc[:] = pc1[iidx1]
        _pc = pc.to_zarr(pc_path, overwrite=True,compute=False)
        _pc_list += (_pc,)

    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_pc_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

Usage:

In [None]:
pc_data1 = np.random.rand(1000,3).astype(np.float32)+1j*np.random.rand(1000,3).astype(np.float32)

idx1 = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx1.sort()
idx1 = np.stack(np.unravel_index(idx1,shape=(100,100))).astype(np.int32)

idx2 = np.random.choice(np.arange(100*100,dtype=np.int32),size=800,replace=False)
idx2.sort()
idx2 = np.stack(np.unravel_index(idx2,shape=(100,100))).astype(np.int32)

idx, iidx1 = pc_diff(idx1,idx2)

pc_data = np.empty((idx.shape[1],*pc_data1.shape[1:]),dtype=pc_data1.dtype)
pc_data[:] = pc_data1[iidx1]

idx1_zarr = zarr.open('pc/idx1.zarr','w',shape=idx1.shape,dtype=idx1.dtype,chunks=(2,200))
idx2_zarr = zarr.open('pc/idx2.zarr','w',shape=idx2.shape,dtype=idx2.dtype,chunks=(2,200))
pc1_zarr = zarr.open('pc/pc1.zarr','w',shape=pc_data1.shape,dtype=pc_data1.dtype,chunks=(200,1))
idx1_zarr[:] = idx1
idx2_zarr[:] = idx2
pc1_zarr[:] = pc_data1

In [None]:
de_pc_diff('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr')
de_pc_diff('pc/idx1.zarr','pc/idx2.zarr','pc/idx.zarr',pc1='pc/pc1.zarr', pc='pc/pc.zarr')
idx_zarr = zarr.open('pc/idx.zarr','r')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(idx_zarr[:],idx)
np.testing.assert_array_equal(pc_zarr[:],pc_data)

2023-10-18 16:45:07 - de_pc_diff - INFO - fetching args:
2023-10-18 16:45:07 - de_pc_diff - INFO - idx1 = 'pc/idx1.zarr'
2023-10-18 16:45:07 - de_pc_diff - INFO - idx2 = 'pc/idx2.zarr'
2023-10-18 16:45:07 - de_pc_diff - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:45:07 - de_pc_diff - INFO - pc1 = None
2023-10-18 16:45:07 - de_pc_diff - INFO - pc = None
2023-10-18 16:45:07 - de_pc_diff - INFO - pc_chunk_size = None
2023-10-18 16:45:07 - de_pc_diff - INFO - n_pc_chunk = None
2023-10-18 16:45:07 - de_pc_diff - INFO - log = None
2023-10-18 16:45:07 - de_pc_diff - INFO - fetching args done.
2023-10-18 16:45:07 - de_pc_diff - INFO - pc/idx1.zarr zarray shape: (2, 1000)
2023-10-18 16:45:07 - de_pc_diff - INFO - pc/idx1.zarr zarray chunks: (2, 200)
2023-10-18 16:45:07 - de_pc_diff - INFO - pc/idx1.zarr zarray dtype: int32
2023-10-18 16:45:07 - de_pc_diff - INFO - pc/idx2.zarr zarray shape: (2, 800)
2023-10-18 16:45:07 - de_pc_diff - INFO - pc/idx2.zarr zarray chunks: (2, 200)
2023-10-18 16:45:07 

In [None]:
#| export
@call_parse
@log_args
def de_pc_thres_ras(ras, # the raster image used for thresholding
                    idx, # output, index of selected pixels
                    min_thres=None, # minimum value of `thres_ras` pixels to be selected in the output point cloud
                    max_thres=None, # maximum value of `thres_ras` pixels to be selected in the output point cloud
                    pc_chunk_size:int=None, # chunk size in output data,optional
                    n_pc_chunk:int=None, # number of chunk in output data, optional
                    log:str=None, # log file. Default: no log file
                   ):
    '''generate point cloud index based on threshold of one raster image.
    '''
    idx_path = idx
    logger = get_logger(logfile=log)
    ras_zarr = zarr.open(ras, mode='r'); logger.zarr_info(ras,ras_zarr)

    ras = ras_zarr[:]; logger.info('loading ras into memory.')
    if (min_thres is not None) and (max_thres is not None):
        is_pc = (ras >= min_thres)&(ras<= max_thres)
        logger.info('select pc based on min_thres and max_thres.')
    elif (min_thres is not None) and (max_thres is None):
        is_pc = ras >= min_thres
        logger.info('select pc based on min_thres.')
    elif (min_thres is None) and (max_thres is not None):
        is_pc = ras<= max_thres
        logger.info('select pc based on max_thres.')
    else:
        is_pc = np.ones_like(ras,dtype=bool)
        logger.info('no input min_thres and max_thres, select all pixels')
    idx = np.stack(np.where(is_pc)).astype(np.int32)
    n_pc = idx.shape[1]
    logger.info(f'number of selected pixels: {n_pc}.')
    pc_chunk_size = get_pc_chunk_size_from_n_az_chunk('ras','idx',ras_zarr.shape[0],ras_zarr.chunks[0],n_pc,logger=logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)
    idx_zarr = zarr.open(idx_path,'w',dtype=idx.dtype,shape=idx.shape,chunks=(2,pc_chunk_size))
    logger.info('writing idx.')
    idx_zarr[:] = idx

In [None]:
ras = np.random.rand(100,100).astype(np.float32)
min_thres = 0.1; max_thres=0.5
is_pc = (ras>=min_thres) & (ras<=max_thres)
idx = np.stack(np.where(is_pc)).astype(np.int32)
ras_zarr = zarr.open('pc/ras.zarr','rw',shape=ras.shape,dtype=ras.dtype,chunks=(10,100))
ras_zarr[:] = ras

In [None]:
de_pc_thres_ras('pc/ras.zarr','pc/idx.zarr',min_thres,max_thres)
idx_zarr = zarr.open('pc/idx.zarr','r')
np.testing.assert_array_equal(idx_zarr[:],idx)

2023-10-18 16:45:10 - de_pc_thres_ras - INFO - fetching args:
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - ras = 'pc/ras.zarr'
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - min_thres = 0.1
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - max_thres = 0.5
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - pc_chunk_size = None
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - n_pc_chunk = None
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - log = None
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - fetching args done.
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - pc/ras.zarr zarray shape: (100, 100)
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - pc/ras.zarr zarray chunks: (10, 100)
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - pc/ras.zarr zarray dtype: float32
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - loading ras into memory.
2023-10-18 16:45:10 - de_pc_thres_ras - INFO - select pc based on min_thres and max_thres.
2023-10-

In [None]:
#| export
@call_parse
@log_args
def de_pc_thres_pc(idx_in,# the index of input pc data
                   pc_in, # the point cloud data used for thresholding
                   idx, # output, index of selected pixels
                   min_thres=None, # minimum value of `thres_ras` pixels to be selected in the output point cloud
                   max_thres=None, # maximum value of `thres_ras` pixels to be selected in the output point cloud
                   pc_chunk_size:int=None, # chunk size in output data,optional
                   n_pc_chunk:int=None, # number of chunk in output data, optional
                   log:str=None, # log file. Default: no log file
                   ):
    '''generate point cloud index and data based on threshold of one point cloud data.
    '''
    idx_path = idx
    logger = get_logger(logfile=log)
    idx_in_zarr = zarr.open(idx_in,mode='r'); logger.zarr_info(idx_in,idx_in_zarr)
    pc_in_zarr = zarr.open(pc_in, mode='r'); logger.zarr_info(pc_in,pc_in_zarr)

    idx_in = idx_in_zarr[:]; logger.info('loading idx_in into memory.')
    pc_in = pc_in_zarr[:]; logger.info('loading pc_in into memory.')

    if (min_thres is not None) and (max_thres is not None):
        is_pc = (pc_in >= min_thres)&(pc_in <= max_thres)
        logger.info('select pc based on min_thres and max_thres.')
    elif (min_thres is not None) and (max_thres is None):
        is_pc = pc_in >= min_thres
        logger.info('select pc based on min_thres.')
    elif (min_thres is None) and (max_thres is not None):
        is_pc = pc_in <= max_thres
        logger.info('select pc based on max_thres.')
    else:
        is_pc = np.ones_like(pc_in,dtype=bool)
        logger.info('no input min_thres and max_thres, select all pixels')

    idx = idx_in[:,is_pc]
    n_pc = idx.shape[1]
    logger.info(f'number of selected pixels: {n_pc}.')
    pc_chunk_size = get_pc_chunk_size_from_n_pc_chunk('idx_in','idx',idx_in_zarr.shape[1],idx_in_zarr.chunks[1],n_pc, logger, pc_chunk_size=pc_chunk_size, n_pc_chunk= n_pc_chunk)
    idx_zarr = zarr.open(idx_path,'w',dtype=idx.dtype,shape=idx.shape,chunks=(2,pc_chunk_size))
    logger.info('writing idx.')
    idx_zarr[:] = idx

Usage:

In [None]:
pc_in = np.random.rand(1000).astype(np.float32)
idx_in = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx_in.sort()
idx_in = np.stack(np.unravel_index(idx_in,shape=(100,100))).astype(np.int32)

min_thres = 0.1; max_thres=0.5
is_pc = (pc_in>=min_thres) & (pc_in<=max_thres)
idx = idx_in[:,is_pc]
pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
idx_in_zarr = zarr.open('pc/idx_in.zarr','w',shape=idx_in.shape,dtype=idx_in.dtype,chunks=(2,100))
pc_in_zarr[:] = pc_in; idx_in_zarr[:] = idx_in

In [None]:
de_pc_thres_pc('pc/idx_in.zarr','pc/pc_in.zarr','pc/idx.zarr',min_thres,max_thres)
idx_zarr = zarr.open('pc/idx.zarr','r')
np.testing.assert_array_equal(idx_zarr[:],idx)

2023-10-18 16:45:11 - de_pc_thres_pc - INFO - fetching args:
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - idx_in = 'pc/idx_in.zarr'
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc_in = 'pc/pc_in.zarr'
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - min_thres = 0.1
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - max_thres = 0.5
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc_chunk_size = None
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - n_pc_chunk = None
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - log = None
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - fetching args done.
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc/idx_in.zarr zarray shape: (2, 1000)
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc/idx_in.zarr zarray chunks: (2, 100)
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc/idx_in.zarr zarray dtype: int32
2023-10-18 16:45:11 - de_pc_thres_pc - INFO - pc/pc_in.zarr zarray shape: (1000,)
2023-10-18 16:45:11 

In [None]:
#| export
@call_parse
@log_args
def de_pc_select_data(idx_in:str, # index of the input data
                      idx:str, # index of the output data
                      pc_in:str|list, # path (in string) or list of path for the input point cloud data
                      pc:str|list, # path (in string) or list of path for the output point cloud data
                      pc_chunk_size:int=None, # chunk size in output data,optional
                      n_pc_chunk:int=None, # number of chunk in output data, optional
                      log:str=None, # log file. Default: no log file
                     ):
    '''generate point cloud data based on its index and one point cloud data.
    The index of generated point cloud data must in the index of the old one.
    '''
    idx_in_path = idx_in; idx_path = idx
    logger = get_logger(logfile=log)
    idx_in_zarr = zarr.open(idx_in_path,mode='r'); logger.zarr_info(idx_in_path,idx_in_zarr)
    idx_zarr = zarr.open(idx_path,mode='r'); logger.zarr_info(idx_path,idx_zarr)
    logger.info('loading idx_in and idx into memory.')
    idx_in = idx_in_zarr[:]; idx = idx_zarr[:]
    iidx_in, iidx = pc_intersect(idx_in,idx)[1:]
    np.testing.assert_array_equal(iidx,np.arange(iidx.shape[0]),err_msg='idx have points that are not covered by idx_in.')
    n_pc = iidx_in.shape[0]
    pc_chunk_size = get_pc_chunk_size_from_pc_chunk_size('idx','pc',idx_zarr.chunks[1],n_pc,logger,pc_chunk_size=pc_chunk_size,n_pc_chunk=n_pc_chunk)

    if isinstance(pc_in,str):
        assert isinstance(pc,str)
        pc_in_list = [pc_in]; pc_list = [pc]
    else:
        assert isinstance(pc_in,list); assert isinstance(pc,list)
        pc_in_list = pc_in; pc_list = pc

    logger.info('starting dask local cluster.')
    cluster = LocalCluster(); client = Client(cluster)
    logger.info('dask local cluster started.')
    
    _pc_list = ()
    for pc_in_path, pc_path in zip(pc_in_list,pc_list):
        pc_in_zarr = zarr.open(pc_in_path,'r'); logger.zarr_info(pc_in_path, pc_in_zarr)
        pc_in = da.from_zarr(pc_in_path); logger.darr_info('pc_in', pc_in)
        logger.info('set up selected pc data dask array.')
        pc = da.empty((n_pc,*pc_in.shape[1:]),chunks = (pc_chunk_size,*pc_in.chunks[1:]), dtype=pc_in.dtype)
        logger.darr_info('pc',pc)
        pc[:] = pc_in[iidx_in]
        _pc = pc.to_zarr(pc_path, overwrite=True,compute=False)
        _pc_list += (_pc,)

    logger.info('computing graph setted. doing all the computing.')
    da.compute(*_pc_list)

    logger.info('computing finished.')
    cluster.close()
    logger.info('dask cluster closed.')

In [None]:
pc_in = np.random.rand(1000).astype(np.float32)
idx_in = np.random.choice(np.arange(100*100,dtype=np.int32),size=1000,replace=False)
idx_in.sort()
idx_in = np.stack(np.unravel_index(idx_in,shape=(100,100))).astype(np.int32)

iidx_in = np.random.choice(np.arange(1000,dtype=np.int64),size=500,replace=False); iidx_in.sort()
idx = idx_in[:,iidx_in]
pc = pc_in[iidx_in]

pc_in_zarr = zarr.open('pc/pc_in.zarr','w',shape=pc_in.shape,dtype=pc_in.dtype,chunks=(100,))
idx_in_zarr = zarr.open('pc/idx_in.zarr','w',shape=idx_in.shape,dtype=idx_in.dtype,chunks=(2,100))
idx_zarr = zarr.open('pc/idx.zarr','w',shape=idx.shape,dtype=idx.dtype,chunks=(2,100))
pc_in_zarr[:] = pc_in; idx_in_zarr[:] = idx_in; idx_zarr[:] = idx

In [None]:
de_pc_select_data('pc/idx_in.zarr','pc/idx.zarr','pc/pc_in.zarr','pc/pc.zarr')
pc_zarr = zarr.open('pc/pc.zarr','r')
np.testing.assert_array_equal(pc_zarr[:],pc)

2023-10-18 16:45:11 - de_pc_select_data - INFO - fetching args:
2023-10-18 16:45:11 - de_pc_select_data - INFO - idx_in = 'pc/idx_in.zarr'
2023-10-18 16:45:11 - de_pc_select_data - INFO - idx = 'pc/idx.zarr'
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc_in = 'pc/pc_in.zarr'
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc = 'pc/pc.zarr'
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc_chunk_size = None
2023-10-18 16:45:11 - de_pc_select_data - INFO - n_pc_chunk = None
2023-10-18 16:45:11 - de_pc_select_data - INFO - log = None
2023-10-18 16:45:11 - de_pc_select_data - INFO - fetching args done.
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc/idx_in.zarr zarray shape: (2, 1000)
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc/idx_in.zarr zarray chunks: (2, 100)
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc/idx_in.zarr zarray dtype: int32
2023-10-18 16:45:11 - de_pc_select_data - INFO - pc/idx.zarr zarray shape: (2, 500)
2023-10-18 16:45:11 - de_pc_select_data - 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()