In [1]:
from helper_loader import *

set_log_level("DEBUG")

In [2]:
from datetime import datetime
from itertools import product
import time

import pandas as pd
from scipy.spatial import KDTree

from histalign.backend.registration.alignment import interpolate_sparse_3d_array

In [3]:
def replace_workers_default(function: Callable, value: int) -> None:
    defaults = list(function.__defaults__)
    defaults[-1] = value
    function.__defaults__ = tuple(defaults)

In [4]:
alignment_path = Path(
    "/home/ediun/git/histalign/projects/project_cortical_depth/93e6cae680"
)

array = build_alignment_volume(alignment_path, return_raw_array=True)

mask = load_structure_mask("Isocortex", Resolution.MICRONS_25)

[2024-11-13 13:27:34] - [   DEBUG ] - Found cached volume. Loading from file. (histalign.backend.registration.alignment:55)


In [5]:
results_dataframe = pd.DataFrame(
    columns=[
        "worker_count",
        "neighbours_count",
        "chunk_size",
        "use_mask",
        "time",
    ],
)
results_dataframe = results_dataframe.astype(
    {
        "worker_count": int,
        "neighbours_count": int,
        "chunk_size": int,
        "use_mask": bool,
        "time": float,
    }
)

output_file_path = (
    f"resources/interpolation_benchmark_{datetime.now().strftime('%y_%m_%d-%H_%M')}.csv"
)

worker_counts = [1, -1]  # [16, -1]
neighbours_counts = [2**3, 3**3, 4**3]  # [3]
chunk_sizes = [1_000_000, 2_000_000, 4_000_000]  # [1_000_000]
use_mask_options = [True]  # [True]

for worker_count, neighbours_count, chunk_size, use_mask in product(
    worker_counts, neighbours_counts, chunk_sizes, use_mask_options
):
    start_time = time.perf_counter()

    replace_workers_default(KDTree.query, worker_count)
    interpolated_array = interpolate_sparse_3d_array(
        array,
        reference_mask=mask if use_mask else None,
        neighbours=neighbours_count,
        chunk_size=chunk_size,
        use_cache=False,
    )
    end_time = time.perf_counter()

    new_row = pd.DataFrame(
        [[worker_count, neighbours_count, chunk_size, use_mask, end_time - start_time]],
        columns=results_dataframe.columns,
    )
    results_dataframe = pd.concat([results_dataframe, new_row], ignore_index=True)

    results_dataframe.to_csv(output_file_path)

[2024-11-13 13:27:36] - [    INFO ] - Starting interpolation with parameters {kernel: multiquadric, neighbours: 8, epsilon: 1, degree: None, chunk size: 1,000,000, recursive: False}. (histalign.backend.registration.alignment:204)
[2024-11-13 13:27:36] - [    INFO ] - Interpolating chunk 1/8 (12%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:28:02] - [    INFO ] - Interpolating chunk 2/8 (25%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:28:50] - [    INFO ] - Interpolating chunk 3/8 (38%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:29:38] - [    INFO ] - Interpolating chunk 4/8 (50%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:30:20] - [    INFO ] - Interpolating chunk 5/8 (62%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:30:52] - [    INFO ] - Interpolating chunk 6/8 (75%). (histalign.backend.registration.alignment:238)
[2024-11-13 13:31:14] - [    INFO ] - Interpolating chunk 7/8 (88%). (hist

In [6]:
results_dataframe

Unnamed: 0,worker_count,neighbours_count,chunk_size,use_mask,time
0,1,8,1000000,True,251.440988
1,1,8,2000000,True,257.203675
2,1,8,4000000,True,253.615925
3,1,27,1000000,True,329.167434
4,1,27,2000000,True,347.489357
5,1,27,4000000,True,379.910391
6,1,64,1000000,True,883.013129
7,1,64,2000000,True,836.081551
8,1,64,4000000,True,903.279962
9,-1,8,1000000,True,35.730645


In [22]:
results_dataframe[results_dataframe.chunk_size == 1_000_000].groupby(
    ["worker_count", "neighbours_count"]
).time.mean()

worker_count  neighbours_count
-1            8                    35.730645
              27                  106.407889
              64                  802.209781
 1            8                   251.440988
              27                  329.167434
              64                  883.013129
Name: time, dtype: float64