In [2]:
from pathlib import Path
import re
import numpy as np
import dask.array as da
from dask import delayed, compute
from czifile import imread
from ome_zarr.io import parse_url
from ome_zarr.writer import write_image, write_labels
from ome_zarr.scale import Scaler
from tqdm.auto import tqdm
from dask.diagnostics import ProgressBar

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Iterative OME-Zarr writer (no Dask): CZIs -> (C,Z,Y,X), T<=100, forward-fill segs
# Creates:
#   <out_zarr>/
#     image/0         (T,C,Z,Y,X)
#     image/.zattrs   (NGFF multiscales, axes, scales)
#     labels/seg/0    (T,Z,Y,X)
#     labels/seg/.zattrs (NGFF multiscales for labels)
#     labels/.zattrs  (label registry)

from pathlib import Path
import re
import numpy as np
from czifile import imread
import zarr
from ome_zarr.io import parse_url
from tqdm.auto import tqdm

def save_ome_zarr_iterative(
    img_dir,
    seg_dir,
    out_zarr,
    filename_regex=r"\((\d+)\)\.czi$",
    seg_regex=r"\((\d+)\)\.npy$",
    voxel_size_um=(1.0, 1.0, 1.0),   # (Z, Y, X) in µm
    time_step=None,                  # seconds between frames; None to omit Δt
    chunks=(1, 1, 16, 512, 512),     # (T,C,Z,Y,X) chunks; C-chunk will be clamped
    T_limit=100
):
    img_dir, seg_dir = Path(img_dir), Path(seg_dir)
    url = parse_url(str(Path(out_zarr)), mode="w")
    root = zarr.group(store=url.store, overwrite=True)

    # ---- collect files by time index
    r_img = re.compile(filename_regex)
    r_seg = re.compile(seg_regex)
    img_by_t = {int(m.group(1)): p for p in img_dir.iterdir()
                if p.is_file() and (m:=r_img.search(p.name))}
    seg_by_t = {int(m.group(1)): p for p in seg_dir.iterdir()
                if p.is_file() and (m:=r_seg.search(p.name))}
    if not img_by_t:
        raise RuntimeError("No image files matched.")

    # ---- probe shape/dtype (C,Z,Y,X)
    probe = np.squeeze(imread(str(next(iter(img_by_t.values())))))
    if probe.ndim != 4:
        raise ValueError(f"Expected (C,Z,Y,X); got {probe.shape}")
    C, Z, Y, X = map(int, probe.shape)
    img_dtype = probe.dtype

    # ---- time range
    T = min(T_limit, max(img_by_t.keys())+1)
    tck, cck, zck, yck, xck = chunks
    cck = max(1, min(cck, C))
    chunks = (1, cck, zck, yck, xck)   # per-T write

    # ---- create groups & arrays
    img_grp = root.require_group("image")
    img_arr = img_grp.create_dataset(
        "0", shape=(T, C, Z, Y, X),
        chunks=chunks, dtype=img_dtype, overwrite=True
    )
    lbl_root = root.require_group("labels")
    seg_grp  = lbl_root.require_group("seg")
    seg_arr  = seg_grp.create_dataset(
        "0", shape=(T, Z, Y, X),
        chunks=(1, zck, yck, xck), dtype=np.uint32, overwrite=True
    )

    # ---- write NGFF metadata
    z_um, y_um, x_um = voxel_size_um
    base_scale_img = [
        (time_step if time_step else 1), 1, z_um, y_um, x_um
    ]  # (t,c,z,y,x)
    base_scale_lbl = [
        (time_step if time_step else 1), z_um, y_um, x_um
    ]  # (t,z,y,x)

    img_axes = (
        [{"name":"t","type":"time","unit":"second"}] if time_step else [{"name":"t"}]
        ) + [
        {"name":"c","type":"channel"},
        {"name":"z","type":"space","unit":"micrometer"},
        {"name":"y","type":"space","unit":"micrometer"},
        {"name":"x","type":"space","unit":"micrometer"},
    ]
    seg_axes = [a for a in img_axes if a["name"]!="c"]

    img_grp.attrs["multiscales"] = [{
        "version":"0.4",
        "name":"image",
        "axes": img_axes,
        "datasets":[{"path":"0","coordinateTransformations":[{"type":"scale","scale":base_scale_img}]}]
    }]
    seg_grp.attrs["multiscales"] = [{
        "version":"0.4",
        "name":"seg",
        "axes": seg_axes,
        "datasets":[{"path":"0","coordinateTransformations":[{"type":"scale","scale":base_scale_lbl}]}]
    }]
    lbl_root.attrs["labels"] = [{"name":"seg"}]

    # ---- iterative write with forward-fill for segmentations
    prev_seg = None
    for t in tqdm(range(T), desc="Writing T"):
        # image
        if t in img_by_t:
            arr = np.squeeze(imread(str(img_by_t[t])))      # (C,Z,Y,X)
            if arr.ndim != 4:
                raise ValueError(f"{img_by_t[t]} -> {arr.shape}")
            if arr.dtype != img_dtype:
                arr = arr.astype(img_dtype, copy=False)
            img_arr[t, :, :, :, :] = arr
        else:
            img_arr[t, :, :, :, :] = 0

        # labels (forward-fill)
        if t in seg_by_t:
            lab = np.load(str(seg_by_t[t]))                 # (Z,Y,X)
            if lab.ndim != 3:
                raise ValueError(f"{seg_by_t[t]} -> {lab.shape}")
            lab = lab.astype(np.uint32, copy=False)
            prev_seg = lab
        elif prev_seg is None:
            lab = np.zeros((Z, Y, X), dtype=np.uint32)
        else:
            lab = prev_seg
        seg_arr[t, :, :, :] = lab

    return str(url.path)

In [None]:
save_ome_zarr_iterative(img_dir = r'Z:\The_Holy_de_Broglies\Becca\zeisslightsheet7\20250814',
                                seg_dir = r'Z:\The_Holy_de_Broglies\Becca\zeisslightsheet7\20250814\labels',
                                out_zarr = r'Z:\The_Holy_de_Broglies\Becca\zeisslightsheet7\zarr_store',
                                filename_regex=r"\((\d+)\)\.czi$",
                                seg_regex=r"\((\d+)\)\.npy$",
                                voxel_size_um=(0.457, 0.12, 0.12),   # (Z, Y, X) in µm
                                time_step=300,                  # e.g. 300 (seconds) between frames; None to omit
                                chunks = (1, 1, 16, 512, 512),  # (T, C, Z, Y, X)
                               )