## Writing ome.zarr data from a CZI image file

* Read the CZI image and its metadata into an 6D array
* reduce dimensionality to a 5D array
* write array into an OME-ZARR file

In [None]:
# check if the notebook runs in Google Colab
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
if IN_COLAB:
  # Install dependencies
  ! pip install --upgrade pip
  ! pip install czitools
  ! pip install ome-zarr
  ! pip install ngff-zarr[validate,dask-image]

In [None]:
from czitools.read_tools import read_tools
from czitools.metadata_tools import czi_metadata as czimd
from czitools.metadata_tools.czi_metadata import CziMetadata
import ngff_zarr as nz
from pathlib import Path
import dask.array as da
import zarr
import os
import requests
import ome_zarr.writer
import ome_zarr.format
from ome_zarr.io import parse_url
from typing import Union, Optional
import shutil
import numpy as np
import xarray as xr
from czitools.utils import logging_tools
from importlib.metadata import version

logger = logging_tools.set_logging()

# show currently used version of NGFF specification
ngff_version = ome_zarr.format.CurrentFormat().version
logger.info(f"Using ngff format version: {ngff_version}")
logger.info(f"ZARR Version: {zarr.__version__}")
logger.info(f"NGFF-ZARR Version: {nz.__version__}")
logger.info(f"OME-ZARR Version: {version('ome-zarr')}")

In [None]:
def get_display(metadata: CziMetadata, channel_index: int) -> tuple[float, float, float]:
    """
    Extract display range settings for a specific channel from CZI metadata.

    This function retrieves the intensity display window settings (min, max) for a given
    channel from the CZI file metadata. These settings control how the image appears
    when displayed in image viewers.

    Args:
        metadata: CziMetadata object containing channel information and display settings
        channel_index: Zero-based index of the channel to extract display settings for

    Returns:
        tuple[float, float, float]: A tuple containing:
            - lower: Minimum intensity value for display window
            - higher: Maximum intensity value for display window
            - maxvalue: Absolute maximum intensity value for the channel

    Note:
        If display settings cannot be read from the CZI metadata (e.g., missing or
        corrupted data), the function falls back to using 0 as minimum and the
        channel's maximum value as both the display maximum and absolute maximum.
    """

    # Try to read the display settings embedded in the CZI file
    try:
        # Calculate actual intensity values from normalized display limits (0.0-1.0)
        # clims contains normalized values that need to be scaled by the max intensity
        lower = np.round(
            metadata.channelinfo.clims[channel_index][0] * metadata.maxvalue_list[channel_index],
            0,
        )
        higher = np.round(
            metadata.channelinfo.clims[channel_index][1] * metadata.maxvalue_list[channel_index],
            0,
        )

        # Get the absolute maximum intensity value for this channel
        maxvalue = metadata.maxvalue_list[channel_index]

    except IndexError:
        # Fallback when display settings are missing or inaccessible
        print("Calculation from display setting from CZI failed. Use 0-Max instead.")
        lower = 0
        # Use the channel's maximum value from the alternative metadata location
        higher = metadata.maxvalue[channel_index]
        maxvalue = higher

    return lower, higher, maxvalue

In [None]:
def write_omezarr(
    array5d: Union[np.ndarray, xr.DataArray, da.Array],
    zarr_path: Union[str, Path],
    metadata: CziMetadata,
    overwrite: bool = False,
) -> Optional[str]:
    """
    Write a 5D array to OME-ZARR format.

    This function writes a multi-dimensional array (typically from microscopy data)
    to the OME-ZARR format, which is a cloud-optimized format for storing and
    accessing large microscopy datasets.

    Args:
        array5d: Input array with up to 5 dimensions. Can be a numpy array or
                xarray DataArray or dask Array. Expected dimension order is typically TCZYX
                (Time, Channel, Z, Y, X) or similar.
        zarr_path: Path where the OME-ZARR file should be written. Can be a
                  string or Path object.
        metadata: Metadata object containing information about the image.
        overwrite: If True, remove existing file at zarr_path before writing.
                  If False and file exists, return None without writing.
                  Default is False.

    Returns:
        str: Path to the written OME-ZARR file if successful, None if failed.

    Raises:
        None: Function handles errors gracefully and returns None on failure.

    Examples:
        >>> import numpy as np
        >>> data = np.random.rand(10, 2, 5, 512, 512)  # TCZYX
        >>> result = write_omezarr(data, "output.ome.zarr", madata, overwrite=True)
        >>> print(f"Written to: {result}")

    Notes:
        - The function uses chunking strategy (1, 1, 1, Y, X) which keeps
          individual Z-slices as chunks for efficient access.
        - Requires the array to have an 'axes' attribute (typical for xarray)
          or the function will use default axes handling.
        - Uses the current NGFF (Next Generation File Format) specification.
    """

    # Validate input array dimensions - OME-ZARR supports up to 5D
    if len(array5d.shape) > 5:
        print("Input array as more than 5 dimensions.")
        return None

    # Handle existing files based on overwrite parameter
    if Path(zarr_path).exists() and overwrite:
        # Remove existing zarr store completely
        shutil.rmtree(zarr_path, ignore_errors=False, onerror=None)
    elif Path(zarr_path).exists() and not overwrite:
        # Exit early if file exists and overwrite is disabled
        print(f"File already exists at {zarr_path}. Set overwrite=True to remove.")
        return None

    # Display the NGFF specification version being used
    ngff_version = ome_zarr.format.CurrentFormat().version
    print(f"Using ngff format version: {ngff_version}")

    # Initialize zarr store and create root group
    store = parse_url(zarr_path, mode="w").store
    root = zarr.group(store=store, overwrite=overwrite)

    # Write the main image data to zarr
    # Uses chunking strategy that keeps full XY planes together for efficient access
    ome_zarr.writer.write_image(
        image=array5d,
        group=root,
        axes=array5d.axes[1:].lower(),  # Skip first axis (Scene) and convert to lowercase
        storage_options=dict(chunks=(1, 1, 1, array5d.Y.size, array5d.X.size)),
    )

    # Build channel metadata for OMERO visualization
    channels_list = []

    # Process each channel to extract display settings and metadata
    for ch_index in range(metadata.image.SizeC):
        # Extract RGB color from channel metadata (skip first 3 chars, get hex color)
        rgb = metadata.channelinfo.colors[ch_index][3:]
        # Get channel name for display
        chname = metadata.channelinfo.names[ch_index]

        # Calculate display range (min/max intensity values) from CZI metadata
        lower, higher, maxvalue = get_display(metadata, ch_index)

        # Create channel configuration for OMERO viewer
        channels_list.append(
            {
                "color": rgb,  # Hex color code for visualization
                "label": chname,  # Display name for the channel
                "active": True,  # Channel visible by default
                "window": {  # Intensity display range
                    "min": lower,  # Absolute minimum value
                    "start": lower,  # Display window start
                    "end": higher,  # Display window end
                    "max": maxvalue,  # Absolute maximum value
                },
            }
        )

    # Add OMERO metadata for proper visualization in compatible viewers
    ome_zarr.writer.add_metadata(
        root,
        {
            "omero": {
                "name": metadata.filename,  # Original filename for reference
                "channels": channels_list,  # Channel display configurations
            }
        },
    )

    return zarr_path

In [None]:
# try to find the folder with data and download otherwise from GitHub.

# Folder containing the input data
if IN_COLAB:
    INPUT_FOLDER = 'data/'
if not IN_COLAB:
    INPUT_FOLDER = '../../data/'

# Path to the data on GitHub
GITHUB_IMAGES_PATH = "https://raw.githubusercontent.com/sebi06/czitools/main/data.zip"

# Download data
if not (os.path.isdir(INPUT_FOLDER)):
    compressed_data = './data.zip'
    if not os.path.isfile(compressed_data):
        import io
        response = requests.get(GITHUB_IMAGES_PATH, stream=True)
        compressed_data = io.BytesIO(response.content)

    import zipfile
    with zipfile.ZipFile(compressed_data, 'r') as zip_accessor:
        zip_accessor.extractall('./')

In [None]:
if IN_COLAB:
    filepath = os.path.join(os.getcwd(), "data/CellDivision_T3_Z5_CH2_X240_Y170.czi")
    zarr_path = Path(filepath[:-4] + ".ome.zarr")

if not IN_COLAB:
    defaultdir = os.path.join(Path(os.getcwd()).resolve().parents[1], "data")
    filepath = os.path.join(defaultdir, "CellDivision_T3_Z5_CH2_X240_Y170.czi")
    zarr_path = defaultdir / Path(filepath[:-4] + ".ome.zarr")

logger.info(zarr_path)

# check if path exists
remove = True
if zarr_path.exists() and remove:
    shutil.rmtree(zarr_path, ignore_errors=False, onerror=None)

In [None]:
show_napari: bool = True  # Whether to display the result in napari viewer
scene_id: int = 0

# Read the CZI file and return a 6D array with dimension order STCZYX(A)
array, mdata = read_tools.read_6darray(filepath, use_xarray=True)
array = array[scene_id, ...]
zarr_path1: Path = Path(str(filepath)[:-4] + ".ome.zarr")

print(f"Array Type: {type(array)}, Shape: {array.shape}, Dtype: {array.dtype}")

# Write OME-ZARR using utility function
result_zarr_path1: Optional[str] = write_omezarr(array, zarr_path=str(zarr_path1), metadata=mdata, overwrite=True)
print(f"Written OME-ZARR using ome-zarr-py: {result_zarr_path1}")

In [None]:
if not IN_COLAB and show_napari and result_zarr_path1 is not None:
    import napari

    viewer: napari.Viewer = napari.Viewer()
    viewer.open(result_zarr_path1, plugin="napari-ome-zarr")
    napari.run()


In [None]:
def write_omezarr_ngff(
    array5d, zarr_output_path: str, metadata: CziMetadata, scale_factors: list = [2, 4, 6], overwrite: bool = False
) -> Optional[nz.NgffImage]:
    """
    Write a 5D array to OME-ZARR NGFF format with multi-scale pyramids.
    This function converts a 5D array (with dimensions t, c, z, y, x) to the OME-ZARR
    Next Generation File Format (NGFF) specification, creating multi-scale representations
    for efficient visualization and analysis.
    Parameters
    ----------
    array5d : array-like
        5D array with dimensions in order [t, c, z, y, x] representing time, channels,
        z-depth, y-coordinate, and x-coordinate respectively.
    zarr_output_path : str
        File path where the OME-ZARR file will be written. Should end with '.ome.zarr'
        extension by convention.
    metadata : CziMetadata
        Metadata object containing scale information (X, Y, Z pixel sizes) and filename
        for the source image.
    scale_factors : list, optional
        List of downsampling factors for creating multi-scale pyramid levels.
        Default is [2, 4, 6].
    overwrite : bool, optional
        If True, existing files at zarr_output_path will be removed before writing.
        If False and file exists, function returns None without writing.
        Default is False.
    Returns
    -------
    image or None
        Returns the NGFF image object if successful, or None if the file already
        exists and overwrite=False.
    Notes
    -----
    - Creates multi-scale representations using Gaussian downsampling via dask-image
    - Automatically sets proper dimension names and scale metadata
    - Uses chunked storage for efficient access patterns
    - Follows OME-ZARR NGFF specification for interoperability
    """

    # Validate input array dimensions - OME-ZARR supports up to 5D
    if len(array5d.shape) > 5:
        print("Input array as more than 5 dimensions.")
        return None

    # check if zarr_path already exits
    if Path(zarr_output_path).exists() and overwrite:
        shutil.rmtree(zarr_output_path, ignore_errors=False, onerror=None)
    elif Path(zarr_output_path).exists() and not overwrite:
        print(f"File already exists at {zarr_output_path}. Set overwrite=True to remove.")
        return None

    # create NGFF image from the array
    image = nz.to_ngff_image(
        array5d.data,
        dims=["t", "c", "z", "y", "x"],
        scale={"y": metadata.scale.Y, "x": metadata.scale.X, "z": metadata.scale.Z},
        name=metadata.filename[:-4] + ".ome.zarr",
    )

    # create multi-scaled, chunked data structure from the image
    multiscales = nz.to_multiscales(image, scale_factors=scale_factors, method=nz.Methods.DASK_IMAGE_GAUSSIAN)

    # write using ngff-zarr
    nz.to_ngff_zarr(zarr_output_path, multiscales)

    return image

In [None]:
# Approach 2: Use ngff-zarr to create NGFF structure and write using ome-zarr-py
if IN_COLAB:
    filepath2 = os.path.join(os.getcwd(), "data/CellDivision_T3_Z5_CH2_X240_Y170.czi")
    zarr_path2 = Path(filepath2[:-4] + ".ome.zarr")

if not IN_COLAB:
    defaultdir = os.path.join(Path(os.getcwd()).resolve().parents[1], "data")
    filepath2 = os.path.join(defaultdir, "CellDivision_T3_Z5_CH2_X240_Y170.czi")
    zarr_path2 = defaultdir / Path(filepath2[:-4] + ".ome.zarr")

    # create NGFF image from the array
image = nz.to_ngff_image(array.data,
                         dims=["t", "c", "z", "y", "x"],
                         scale={"y": mdata.scale.Y, "x": mdata.scale.X, "z": mdata.scale.Z},
                         name=mdata.filename)

# create multi-scaled, chunked data structure from the image
multiscales = nz.to_multiscales(image, [2, 4], method=nz.Methods.DASK_IMAGE_GAUSSIAN)

# write using ngff-zarr
nz.to_ngff_zarr(zarr_path2, multiscales)
logger.info(f"NGFF Image: {image}")
logger.info(f"Written OME-ZARR using ngff-zarr: {zarr_path2}")