In [1]:
import numpy as np

from helper_loader import *

set_log_level("DEBUG")

In [2]:
client = Client(n_workers=4, threads_per_worker=1, memory_limit="2GB")
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 4
Total threads: 4,Total memory: 7.45 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:46403,Workers: 4
Dashboard: http://127.0.0.1:8787/status,Total threads: 4
Started: Just now,Total memory: 7.45 GiB

0,1
Comm: tcp://127.0.0.1:38191,Total threads: 1
Dashboard: http://127.0.0.1:38467/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:38721,
Local directory: /tmp/dask-scratch-space/worker-dgks85ib,Local directory: /tmp/dask-scratch-space/worker-dgks85ib

0,1
Comm: tcp://127.0.0.1:44389,Total threads: 1
Dashboard: http://127.0.0.1:45139/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:46285,
Local directory: /tmp/dask-scratch-space/worker-v7zw1wd_,Local directory: /tmp/dask-scratch-space/worker-v7zw1wd_

0,1
Comm: tcp://127.0.0.1:41717,Total threads: 1
Dashboard: http://127.0.0.1:34667/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:45887,
Local directory: /tmp/dask-scratch-space/worker-s5evytw1,Local directory: /tmp/dask-scratch-space/worker-s5evytw1

0,1
Comm: tcp://127.0.0.1:42171,Total threads: 1
Dashboard: http://127.0.0.1:42803/status,Memory: 1.86 GiB
Nanny: tcp://127.0.0.1:40435,
Local directory: /tmp/dask-scratch-space/worker-45f3ci_9,Local directory: /tmp/dask-scratch-space/worker-45f3ci_9


In [3]:
alignment_path = Path(
    "/home/ediun/git/histalign/projects/project_cortical_depth/93e6cae680"
)

array = build_alignment_volume(alignment_path, return_raw_array=True)

[2024-11-07 16:15:23] - [   DEBUG ] - Found cached volume. Loading from file. (histalign.backend.registration.alignment:55)


# First idea

Instead of feeding all known points to the `RBFInterpolator`, let's try to take a subset of the volume and feed it to it instead.

In [12]:
def interpolate(
    array: np.ndarray, mask: Optional[np.ndarray] = None, dask: bool = False
) -> np.ndarray:
    interpolated_array = array.copy().astype(np.float64)

    known_coordinates = np.nonzero(interpolated_array)
    known_points = np.array(known_coordinates).T
    known_points = da.from_array(known_points)

    known_values = array[known_coordinates]
    known_values = da.from_array(known_values)

    interpolator = RBFInterpolator(
        known_points,
        known_values,
        kernel="multiquadric",
        neighbors=16,
        epsilon=1,
        degree=None,
    )

    def interpolation_function(chunk: np.ndarray) -> np.ndarray:
        try:
            interpolated_data = interpolator(chunk)
        except np.linalg.LinAlgError:
            interpolated_data = np.zeros(shape=(chunk.shape[0],), dtype=np.float64)

        return interpolated_data

    if mask is not None:
        target_coordinates = np.nonzero(mask)
    else:
        target_coordinates = get_tuple_all_coordinates(interpolated_array.shape)
    target_points = np.array(target_coordinates, np.uint16).T
    if dask:
        target_points = da.from_array(target_points)

    if dask:
        interpolated_array[target_coordinates] = da.map_blocks(
            interpolation_function, target_points, drop_axis=1, dtype=np.float64
        ).compute()
    else:
        interpolated_array[target_coordinates] = interpolation_function(target_points)

    return interpolated_array

In [13]:
mask = load_structure_mask("Somatomotor areas", Resolution.MICRONS_25)

interpolated_array = interpolate(array, mask)

This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [14]:
interpolated_array.shape

(528, 320, 456)

In [16]:
show(vedo.Volume(np.where(mask, interpolated_array, 0)))