In [1]:
import h5py
import zarr
import time
from datetime import timedelta
import dask
from dask import delayed
import dask.array as da
import numpy as np
from numcodecs import Blosc
from pathlib import Path
import dask_memusage
import gc
import warnings
import os

from dask.distributed import Client, LocalCluster, as_completed, get_worker

print("All libraries imported succesfully!")

All libraries imported succesfully!


In [14]:
# Conversion arguments

input_path = "/Users/tobiasschleiss/documents/dtu/thesis/input/small_wMB_4bin.h5"
output_path = "/Users/tobiasschleiss/Documents/DTU/Thesis/output/output2.ome.zarr"
target_chunks = (64, 64, 64)
dataset_path = 'exchange/data'
temp_chunk_size=(64, 512, 512)
max_mem_gb=15
pyramid_levels = 5
downsample_factor=2
compression_level=3
target_top_level_mb=100
safety_factor = 0.80

available_bytes = max_mem_gb * 1e9 * safety_factor

n_workers = 8
mem_divider = 24
threads_per_worker=1
memory_limit = available_bytes/n_workers
print(f"Mem per worker (bytes): {memory_limit}")
print(f"Mem per worker (GB): {(memory_limit/1e9)}")

Mem per worker (bytes): 1500000000.0
Mem per worker (GB): 1.5


In [3]:
# Inspect HDF5 file
with h5py.File(input_path, 'r') as f:
    if dataset_path not in f:
        print(f"  ERROR: Dataset '{dataset_path}' not found")
        print(f"  Available paths: {list(f.keys())}")
        
    dataset = f[dataset_path]
    shape = dataset.shape
    dtype = dataset.dtype
    h5_chunks = dataset.chunks
    data_size_gb = dataset.nbytes / (1024**3)
    data_size_mb = dataset.nbytes / (1024**2)
    dtype_size = dtype.itemsize
        
    print(f"  Shape: {shape}")
    print(f"  Dtype: {dtype}")
    print(f"  Size: {data_size_gb:.2f} GB")
    print(f"  HDF5 chunks: {h5_chunks if h5_chunks else 'Contiguous'}")

  Shape: (1651, 2200, 2200)
  Dtype: float32
  Size: 29.77 GB
  HDF5 chunks: Contiguous


In [4]:
"""Calculate pyramid levels based on target top-level size"""
    
# Calculate levels needed
levels = 1
current_size_mb = data_size_mb
    
while current_size_mb > target_top_level_mb:
    current_size_mb = current_size_mb / (downsample_factor ** 3)
    levels += 1

print(f"Target top level: {target_top_level_mb} MB")
print(f"Recommended levels: {levels}")
print(f"Actual top level: {current_size_mb:.1f} MB")

pyramid_levels = levels

Target top level: 100 MB
Recommended levels: 4
Actual top level: 59.5 MB


In [5]:
z, y, x = shape
block_z, block_y, block_x = target_chunks
    
# Available memory given safety factor
available_bytes = (max_mem_gb * safety_factor / mem_divider) * 1e9

print(mem_divider)
    
# Calculate maximum amount of Z-planes that fit in memory
bytes_per_z_plane = y * x * dtype_size
max_z_planes = int(available_bytes / bytes_per_z_plane)

if max_z_planes >= block_z:
    # Align to target_z for efficient zarr chunking
    # Use largest multiple of target_z that fits
    optimal_z = (max_z_planes // block_z) * block_z
    optimal_z = max(block_z, optimal_z)  # At least one chunk depth
    optimal_z = min(optimal_z, z)   # Don't exceed dataset
    block_z = optimal_z
else:
    print(f"\nFull Target Z plane ({target_chunks[0]}) too large for memory")
    print("Reducing Y axis to fit block in memory")
    
    # Calculate max Y that fits with target Z and full X
    bytes_per_y_row = block_z * x * dtype_size
    max_y_rows = int(available_bytes / bytes_per_y_row)
    optimal_y = (max_y_rows // block_y) * block_y 
    optimal_y = max(block_y, optimal_y)  # At least one chunk depth
    if max_y_rows >= y/2+block_y:
        optimal_y = int(min(optimal_y, ((y/2)//block_y)*block_y+block_y))   # Don't exceed half of y + target_y
    y = optimal_y

block_shape = block_z, y, x
    
# Calculate actual memory usage
actual_gb = block_z * y * x * dtype_size / 1e9
    
print(f"\n{'='*60}")
print("Optimal Block Size Calculation")
print(f"{'='*60}")
print(f"Memory budget: {max_mem_gb:.2f} GB (using {int(safety_factor*100)}%)")
print(f"Available for block: {available_bytes/1e9:.2f} GB")
print(f"Actual block size: {actual_gb:.2f} GB")
print(f"{'='*60}")
print(f"Read chunks: {block_shape}")

24

Full Target Z plane (64) too large for memory
Reducing Y axis to fit block in memory

Optimal Block Size Calculation
Memory budget: 15.00 GB (using 80%)
Available for block: 0.50 GB
Actual block size: 0.47 GB
Read chunks: (64, 832, 2200)


In [6]:
print("DEBUG: entering hybrid_conversion")

with h5py.File(input_path, "r") as f:
        dataset = f[dataset_path]
        shape = dataset.shape
        dtype = dataset.dtype
        dtype_size = dtype.itemsize
        data_size_mb = dataset.nbytes / (1024**2)
    
        print(f"block shape: {block_shape}")

        block_z, block_y, block_x = block_shape
        z_total, y_total, x_total = shape

        # For hybrid conversion block_z shouldn't be greater than target_z
        block_z = target_chunks[0]
        

read_chunks_bytes = np.prod(block_shape) * dtype_size

print(f"Number of Workers: {n_workers} memory per worker {memory_limit}")

cluster = LocalCluster(
    n_workers=n_workers,
    threads_per_worker=1,
    processes=True,
    memory_limit=memory_limit
)
client = Client(cluster)
print(f"Dask dashboard: {client.dashboard_link}")

log_path = Path("/Users/tobiasschleiss/Documents/DTU/Thesis/output/") / "memusage.csv"  # Save in output folder
dask_memusage.install(cluster.scheduler, str(log_path))
print(f"Memory logging to: {log_path}")

store = zarr.NestedDirectoryStore(output_path)
root = zarr.open_group(store, mode="w")
compressor = Blosc(cname="zstd", clevel=compression_level, shuffle=Blosc.BITSHUFFLE)

root.create_dataset(
    "0",
    shape=shape,
    chunks=target_chunks,
    dtype=dtype,
    compressor=compressor
)

del root, store

@dask.delayed
def copy_block(z_start, z_end, y_start, y_end, x_start, x_end):
    with h5py.File(input_path, "r") as f:
        block = f[dataset_path][z_start:z_end, y_start:y_end, x_start:x_end]
    
    store = zarr.NestedDirectoryStore(output_path)
    root = zarr.open_group(store, mode="a")
    root["0"][z_start:z_end, y_start:y_end, x_start:x_end] = block

    return (z_end - z_start, y_end - y_start, x_end - x_start)

tasks = []
for z_start in range(0, z_total, block_z):
    z_end = min(z_start + block_z, shape[0])

    for y_start in range(0, y_total, block_y):
        y_end = min(y_start + block_y, y_total)

        for x_start in range(0, x_total, block_x):
            x_end = min(x_start + block_x, x_total)

            tasks.append(copy_block(z_start, z_end, y_start, y_end, x_start, x_end))

total_tasks = len(tasks)
print(f"\n✓ Submitting {total_tasks} tasks for parallel execution...")

start = time.time()

# Submit all tasks and get futures
futures = client.compute(tasks)

# Track progress
completed = 0

for future in as_completed(futures):
    completed += 1
    elapsed = time.time() - start
    rate = completed / elapsed if elapsed > 0 else 0
    eta = (total_tasks - completed) / rate if rate > 0 else 0
    
    print(f"completed blocks: {completed}")

elapsed = time.time() - start

total_gb = np.prod(shape) * dtype_size / 1e9

print(f"\n✓ Complete: {elapsed:.1f}s | {total_gb/elapsed:.2f} GB/s")

client.close()
cluster.close()

DEBUG: entering hybrid_conversion
block shape: (64, 832, 2200)
Number of Workers: 8 memory per worker 1500000000.0
Dask dashboard: http://127.0.0.1:8787/status
Memory logging to: /Users/tobiasschleiss/Documents/DTU/Thesis/output/memusage.csv

✓ Submitting 78 tasks for parallel execution...


  store = zarr.NestedDirectoryStore(output_path)


completed blocks: 1
completed blocks: 2
completed blocks: 3
completed blocks: 4
completed blocks: 5
completed blocks: 6




completed blocks: 7




completed blocks: 8




completed blocks: 9
completed blocks: 10




completed blocks: 11




completed blocks: 12
completed blocks: 13




completed blocks: 14




completed blocks: 15




completed blocks: 16




completed blocks: 17
completed blocks: 18




completed blocks: 19
completed blocks: 20
completed blocks: 21




completed blocks: 22
completed blocks: 23




completed blocks: 24




completed blocks: 25




completed blocks: 26
completed blocks: 27
completed blocks: 28
completed blocks: 29
completed blocks: 30
completed blocks: 31
completed blocks: 32
completed blocks: 33
completed blocks: 34
completed blocks: 35
completed blocks: 36
completed blocks: 37
completed blocks: 38
completed blocks: 39
completed blocks: 40
completed blocks: 41




completed blocks: 42




completed blocks: 43




completed blocks: 44




completed blocks: 45
completed blocks: 46
completed blocks: 47
completed blocks: 48




completed blocks: 49




completed blocks: 50




completed blocks: 51




completed blocks: 52
completed blocks: 53




completed blocks: 54
completed blocks: 55




completed blocks: 56
completed blocks: 57




completed blocks: 58




completed blocks: 59
completed blocks: 60




completed blocks: 61




completed blocks: 62
completed blocks: 63




completed blocks: 64




completed blocks: 65




completed blocks: 66




completed blocks: 67
completed blocks: 68




completed blocks: 69
completed blocks: 70




completed blocks: 71
completed blocks: 72
completed blocks: 73
completed blocks: 74
completed blocks: 75
completed blocks: 76
completed blocks: 77
completed blocks: 78

✓ Complete: 39.0s | 0.82 GB/s


Exception in thread WorkerMemory:
ConnectionRefusedError: [Errno 61] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/comm/core.py", line 342, in connect
    comm = await wait_for(
           ^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/utils.py", line 1930, in wait_for
    return await fut
           ^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/comm/tcp.py", line 560, in connect
    convert_stream_closed_error(self, e)
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/comm/tcp.py", line 143, in convert_stream_closed_error
    raise CommClosedError(f"in {obj}: {exc.__class__.__name__}: {exc}") from exc
distributed.comm.core.CommClosedError: i

In [7]:
# Inspection level_0
source = da.from_zarr(output_path, component='0')
    
print(f"  Source chunks: {source.chunksize}")
print(f"  Source shape: {source.shape}")
print(f"  Source dtype: {source.dtype}")

  Source chunks: (64, 64, 64)
  Source shape: (1651, 2200, 2200)
  Source dtype: float32


In [11]:
client.close()
cluster.close()

In [10]:
#!/usr/bin/env python3

import os
import time
import argparse
import numpy as np
import zarr

from dask.distributed import Client, LocalCluster, wait
from numcodecs import Blosc


# ------------------------------------------------------------
# Worker Task: Block Mean Downsampling
# ------------------------------------------------------------

def mean_downsample_block(
    source_path,
    destination_path,
    block_region,
    destination_coords,
    downsample_factor
):
    """
    Each worker task:
    1. Opens source and destination Zarr arrays.
    2. Reads source block.
    3. Trims it to dimensions divisible by downsample_factor.
    4. Computes block mean over each non-overlapping cube of size downsample_factor.
    5. Writes the downsampled block to the destination array.
    """
    src = zarr.open(source_path, mode="r")
    store = zarr.open(destination_path, mode="r+")

    block = src[block_region]

    d = downsample_factor

    # Trim dimensions to be divisible by downsample factor
    block_z = (block.shape[0] // d) * d
    block_y = (block.shape[1] // d) * d
    block_x = (block.shape[2] // d) * d
    block = block[:block_z, :block_y, :block_x]

    # Reshape to compute block mean
    # Each axis is split into (num_blocks, block_size)
    reshaped = block.reshape(
        block_z // d, d,
        block_y // d, d,
        block_x // d, d
    )

    # Compute mean over the block axes (1,3,5) (downsample the block)
    downsampled = reshaped.mean(axis=(1, 3, 5)).astype(block.dtype)

    # Write to pyramid array
    store[destination_coords] = downsampled


# ------------------------------------------------------------
# Build One Pyramid Level
# ------------------------------------------------------------

def build_level(
    client,
    output_path,
    level,
    downsample_factor,
    target_chunks,
    compressor,
    max_in_flight=128
):

    print(f"\n{'='*60}")
    print(f"LEVEL {level}: Block-Mean Downsampling")
    print(f"{'='*60}")

    source_path = os.path.join(output_path, str(level - 1))
    destination_path = os.path.join(output_path, str(level))

    #load previous level as source
    source = zarr.open(source_path, mode="r")
    current_shape = source.shape
    new_shape = tuple(max(1, s // downsample_factor) for s in current_shape)

    print(f"Previous shape: {current_shape}")
    print(f"New shape: {new_shape}")

    # Create destination array
    zarr.open(
        destination_path,
        mode="w",
        shape=new_shape,
        chunks=target_chunks,
        dtype=source.dtype,
        compressor=compressor,
        dimension_separator="/"
    )

    futures = []
    
    chunk_z, chunk_y, chunk_x = target_chunks

    current_total_tasks = (
        (int(np.ceil(new_shape[0] / chunk_z)))*
        (int(np.ceil(new_shape[1] / chunk_y)))*
        (int(np.ceil(new_shape[2] / chunk_x)))
    )
    print(f"Total tasks for current level: {current_total_tasks}")

    print(f"Total recurring in flights: {current_total_tasks // max_in_flight}")

    level_start = time.time()
    
    completed = 0

    # Iterate over output blocks
    for z_start in range(0, new_shape[0], chunk_z):
        for y_start in range(0, new_shape[1], chunk_y):
            for x_start in range(0, new_shape[2], chunk_x):

                #Tuple holding python slice objects (block write coordinates)
                destination_coords = (
                    slice(z_start, min(z_start + chunk_z, new_shape[0])),
                    slice(y_start, min(y_start + chunk_y, new_shape[1])),
                    slice(x_start, min(x_start + chunk_x, new_shape[2])),
                )

                # Mapping block
                source_start = (
                    z_start * downsample_factor,
                    y_start * downsample_factor,
                    x_start * downsample_factor,
                )
                source_end = (
                    min((z_start + chunk_z) * downsample_factor, current_shape[0]),
                    min((y_start + chunk_y) * downsample_factor, current_shape[1]),
                    min((x_start + chunk_x) * downsample_factor, current_shape[2]),
                )

                #Tuple holding python slice objects (Block to be read from current array)
                block_region = (
                    slice(source_start[0], source_end[0]),
                    slice(source_start[1], source_end[1]),
                    slice(source_start[2], source_end[2])
                )

                # Submit the block task
                future = client.submit(
                    mean_downsample_block,
                    source_path,
                    destination_path,
                    block_region,
                    destination_coords,
                    downsample_factor
                )

                futures.append(future)

                if len(futures) >= max_in_flight:
                    completed += 1
                    print(f"completed: {completed}")
                    wait(futures)
                    futures = []

    if futures:
        wait(futures)

    print(f"Finished level {level} in {(time.time() - level_start):.1f}s")


# ------------------------------------------------------------
# Main Pyramid Builder
# ------------------------------------------------------------


cluster = LocalCluster(
    n_workers=n_workers,
    threads_per_worker=1,
    processes=True,
    memory_limit=memory_limit
)
client = Client(cluster)
print(f"Dask dashboard: {client.dashboard_link}")
print("="*60)
print("Building OME-Zarr Multi-Resolution Pyramid (Block-Mean)")
print("="*60)

compressor = Blosc(
    cname="zstd",
    clevel=compression_level,
    shuffle=Blosc.BITSHUFFLE
)

pyramid_start = time.time()
for level in range(1, pyramid_levels):
    build_level(
        client,
        output_path,
        level,
        downsample_factor,
        target_chunks,
        compressor
    )

print("\nTotal pyramid time: "
      f"{(time.time() - pyramid_start)/60:.2f} minutes")

Perhaps you already have a cluster running?
Hosting the HTTP server on port 62417 instead


Dask dashboard: http://127.0.0.1:62417/status
Building OME-Zarr Multi-Resolution Pyramid (Block-Mean)

LEVEL 1: Block-Mean Downsampling
Previous shape: (1651, 2200, 2200)
New shape: (825, 1100, 1100)
Total tasks for current level: 4212
Total recurring in flights: 32
completed: 1
completed: 2
completed: 3
completed: 4
completed: 5
completed: 6
completed: 7
completed: 8
completed: 9
completed: 10
completed: 11
completed: 12
completed: 13
completed: 14
completed: 15
completed: 16
completed: 17
completed: 18
completed: 19
completed: 20
completed: 21
completed: 22
completed: 23
completed: 24
completed: 25
completed: 26
completed: 27
completed: 28
completed: 29
completed: 30
completed: 31
completed: 32
Finished level 1 in 19.2s

LEVEL 2: Block-Mean Downsampling
Previous shape: (825, 1100, 1100)
New shape: (412, 550, 550)
Total tasks for current level: 567
Total recurring in flights: 4
completed: 1
completed: 2
completed: 3
completed: 4
Finished level 2 in 2.5s

LEVEL 3: Block-Mean Downsampli

In [12]:
#!/usr/bin/env python3

import os
import time
import argparse
import numpy as np
import zarr

from scipy.ndimage import gaussian_filter
from dask.distributed import Client, LocalCluster, wait
from numcodecs import Blosc


# ------------------------------------------------------------
# Worker Task
# ------------------------------------------------------------

def gaussian_downsample_block(
    src_path,
    dst_path,
    src_slices,
    dst_slices,
    sigma,
    downsample_factor,
    halo
):
    """
    Each task:
    - Opens source and destination
    - Reads region with halo
    - Applies Gaussian blur
    - Removes halo
    - Subsamples
    - Writes output block
    """

    src = zarr.open(src_path, mode="r")
    dst = zarr.open(dst_path, mode="r+")

    block = src[src_slices]

    # Apply Gaussian filter
    blurred = gaussian_filter(block, sigma=sigma)

    # Remove halo region
    z0, z1 = halo[0]
    y0, y1 = halo[1]
    x0, x1 = halo[2]

    core = blurred[
        z0:blurred.shape[0]-z1,
        y0:blurred.shape[1]-y1,
        x0:blurred.shape[2]-x1,
    ]

    # Trim to divisible by downsample factor
    z2 = (core.shape[0] // downsample_factor) * downsample_factor
    y2 = (core.shape[1] // downsample_factor) * downsample_factor
    x2 = (core.shape[2] // downsample_factor) * downsample_factor

    core = core[:z2, :y2, :x2]

    # Subsample
    down = core[::downsample_factor, ::downsample_factor, ::downsample_factor]

    dst[dst_slices] = down


# ------------------------------------------------------------
# Build One Level
# ------------------------------------------------------------

def build_level(
    client,
    output_path,
    level,
    downsample_factor,
    target_chunks,
    sigma,
    compressor,
    max_in_flight=128,
):

    print(f"\n{'='*60}")
    print(f"LEVEL {level}: Gaussian Downsampling")
    print(f"{'='*60}")

    src_path = os.path.join(output_path, str(level - 1))
    dst_path = os.path.join(output_path, str(level))

    src = zarr.open(src_path, mode="r")
    current_shape = src.shape

    new_shape = tuple(max(1, s // downsample_factor) for s in current_shape)

    print(f"Previous shape: {current_shape}")
    print(f"New shape: {new_shape}")

    # Create destination array
    dst = zarr.open(
        dst_path,
        mode="w",
        shape=new_shape,
        chunks=target_chunks,
        dtype=src.dtype,
        compressor=compressor,
        dimension_separator="/",
    )

    halo_width = int(3 * sigma)  # typical Gaussian support

    futures = []

    level_start = time.time()

    chunk_z, chunk_y, chunk_x = target_chunks

    for z0 in range(0, new_shape[0], chunk_z):
        for y0 in range(0, new_shape[1], chunk_y):
            for x0 in range(0, new_shape[2], chunk_x):

                dst_slices = (
                    slice(z0, min(z0 + chunk_z, new_shape[0])),
                    slice(y0, min(y0 + chunk_y, new_shape[1])),
                    slice(x0, min(x0 + chunk_x, new_shape[2])),
                )

                # Compute corresponding source region
                src_start = (
                    z0 * downsample_factor,
                    y0 * downsample_factor,
                    x0 * downsample_factor,
                )
                src_end = (
                    min((z0 + chunk_z) * downsample_factor, current_shape[0]),
                    min((y0 + chunk_y) * downsample_factor, current_shape[1]),
                    min((x0 + chunk_x) * downsample_factor, current_shape[2]),
                )

                # Add halo
                src_slices = []
                halo = []

                for dim in range(3):
                    start = max(0, src_start[dim] - halo_width)
                    end = min(current_shape[dim], src_end[dim] + halo_width)

                    left_halo = src_start[dim] - start
                    right_halo = end - src_end[dim]

                    src_slices.append(slice(start, end))
                    halo.append((left_halo, right_halo))

                future = client.submit(
                    gaussian_downsample_block,
                    src_path,
                    dst_path,
                    tuple(src_slices),
                    dst_slices,
                    sigma,
                    downsample_factor,
                    tuple(halo),
                )

                futures.append(future)

                if len(futures) >= max_in_flight:
                    wait(futures)
                    futures = []

    if futures:
        wait(futures)

    print(f"Finished level {level}"
        f"{(time.time() - level_start):.1f}s")


# ------------------------------------------------------------
# Main Pyramid Builder
# ------------------------------------------------------------



sigma = 1.0

cluster = LocalCluster(
    n_workers=n_workers,
    threads_per_worker=1,
    processes=True,
    memory_limit=memory_limit,
)

client = Client(cluster)

print(f"Dask dashboard: {client.dashboard_link}")
print()
print("=" * 60)
print("Building OME-Zarr Multi-Resolution Pyramid (Gaussian)")
print("=" * 60)

compressor = Blosc(
    cname="zstd",
    clevel=compression_level,
    shuffle=Blosc.BITSHUFFLE,
)

pyramid_start = time.time()

for level in range(1, pyramid_levels):

    build_level(
        client,
        output_path,
        level,
        downsample_factor,
        target_chunks,
        sigma,
        compressor,
    )

print("\nTotal pyramid time: "
      f"{(time.time() - pyramid_start)/60:.2f} minutes")

client.close()
cluster.close()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 62484 instead


Dask dashboard: http://127.0.0.1:62484/status

Building OME-Zarr Multi-Resolution Pyramid (Gaussian)

LEVEL 1: Gaussian Downsampling
Previous shape: (1651, 2200, 2200)
New shape: (825, 1100, 1100)


ple_block-4fc6354ce0e1b4840bbff92d42e960ef')" coro=<Worker.execute() done, defined at /opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/worker_state_machine.py:3607>> ended with CancelledError
2026-02-12 17:46:31,674 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/comm/tcp.py", line 226, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/worker.py", line 1273, in heartbeat
    response = await retry_operation(
               ^^^^^^^^^^^^^^^^^

KeyboardInterrupt: 

2026-02-12 17:46:33,687 - distributed.nanny - ERROR - Worker process died unexpectedly
2026-02-12 17:46:33,688 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/asyncio/base_events.py", line 691, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
  File "/opt/homebrew/Caskroom/miniforge/base/envs/jlab/lib/python3.12/site-packages/distributed/core.py", line 494, in finished
    await self._event_finished.wait()
  

In [20]:
print("Stage 3: Write OME-Zarr Metadata")

# ===== ADD OME-ZARR METADATA =====
print(f"{'='*60}")
print("Adding OME-Zarr Metadata")
print(f"{'='*60}")

store = zarr.NestedDirectoryStore(output_path)
root = zarr.open_group(store, mode="a")  # append mode

# Build datasets list
datasets = []
for level in range(pyramid_levels):
    scale_factor = downsample_factor ** level
    datasets.append({
        'path': str(level),
        'coordinateTransformations': [{
            'type': 'scale',
            'scale': [
                float(scale_factor),  # z
                float(scale_factor),  # y
                float(scale_factor)   # x
            ]
        }]
    })
    
# Add multiscales metadata
root.attrs['multiscales'] = [{
    'version': '0.4',
    'name': 'pyramid',
    'axes': [
        {'name': 'z', 'type': 'space', 'unit': 'micrometer'},
        {'name': 'y', 'type': 'space', 'unit': 'micrometer'},
        {'name': 'x', 'type': 'space', 'unit': 'micrometer'}
    ],
    'datasets': datasets,
    'type': 'mean',  # Downsampling method
    'metadata': {
        'description': 'Multi-resolution pyramid',
        'method': 'block mean downsampling'
    }
}]
print("DONE")
print("\nPyramid Summary:")
print("-" * 60)
    
for level in range(pyramid_levels):
    arr = zarr.open(store, mode='r')[str(level)]
    size_gb = np.prod(arr.shape) * arr.dtype.itemsize / 1e9
    print(f"  Level {level}: shape={arr.shape}, chunks={arr.chunks}, size={size_gb:.2f} GB")

Stage 3: Write OME-Zarr Metadata
Adding OME-Zarr Metadata
DONE

Pyramid Summary:
------------------------------------------------------------
  Level 0: shape=(1651, 2200, 2200), chunks=(64, 64, 64), size=31.96 GB
  Level 1: shape=(825, 1100, 1100), chunks=(64, 64, 64), size=3.99 GB
  Level 2: shape=(412, 550, 550), chunks=(64, 64, 64), size=0.50 GB
  Level 3: shape=(206, 275, 275), chunks=(64, 64, 64), size=0.06 GB
  Level 4: shape=(103, 137, 137), chunks=(64, 64, 64), size=0.01 GB
