From 3386155c6849628238ad043ffc3d26e8ee6297d8 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Mar 2025 14:14:22 +0100 Subject: [PATCH 1/2] Work around bitshuffle if not available. --- src/ess/nmx/nexus.py | 58 +++++++++++++++++++++++++++++------------- tests/exporter_test.py | 3 ++- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 2a077f85..42a11156 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -4,10 +4,9 @@ import pathlib import warnings from collections.abc import Callable, Generator -from functools import partial +from functools import partial, wraps from typing import Any, TypeVar -import bitshuffle.h5 import h5py import numpy as np import sciline as sl @@ -34,7 +33,7 @@ def _create_dataset_from_var( name: str, long_name: str | None = None, compression: str | None = None, - compression_opts: int | None = None, + compression_opts: int | tuple[int, int] | None = None, chunks: tuple[int, ...] | int | bool | None = None, dtype: Any = None, ) -> h5py.Dataset: @@ -57,22 +56,47 @@ def _create_dataset_from_var( return dataset -_create_compressed_dataset = partial( - _create_dataset_from_var, - compression=bitshuffle.h5.H5FILTER, - compression_opts=(0, bitshuffle.h5.H5_COMPRESS_LZ4), -) -"""Create dataset with compression options. +@wraps(_create_dataset_from_var) +def _create_compressed_dataset(*args, **kwargs): + """Create dataset with compression options. + + It will try to use ``bitshuffle`` for compression if available. + Otherwise, it will fall back to ``gzip`` compression. -[``Bitshuffle/LZ4``](https://github.com/kiyo-masui/bitshuffle) is used for convenience. -Since ``Dectris`` uses it for their Nexus file compression, it is compatible with DIALS. -``Bitshuffle/LZ4`` tends to give similar results to -GZIP and other compression algorithms with better performance. -A naive implementation of bitshuffle/LZ4 compression, -shown in [issue #124](https://github.com/scipp/essnmx/issues/124), -led to 80% file reduction (365 MB vs 1.8 GB). + [``Bitshuffle/LZ4``](https://github.com/kiyo-masui/bitshuffle) + is used for convenience. + Since ``Dectris`` uses it for their Nexus file compression, + it is compatible with DIALS. + ``Bitshuffle/LZ4`` tends to give similar results to + GZIP and other compression algorithms with better performance. + A naive implementation of bitshuffle/LZ4 compression, + shown in [issue #124](https://github.com/scipp/essnmx/issues/124), + led to 80% file reduction (365 MB vs 1.8 GB). -""" + """ + try: + import bitshuffle.h5 + + compression_filter = bitshuffle.h5.H5FILTER + default_compression_opts = (0, bitshuffle.h5.H5_COMPRESS_LZ4) + except ImportError: + warnings.warn( + UserWarning( + "Could not find the bitshuffle.h5 module from bitshuffle package. " + "The bitshuffle package is not installed or only partially installed. " + "Exporting to NeXus files with bitshuffle compression is not possible." + ), + stacklevel=2, + ) + compression_filter = "gzip" + default_compression_opts = 4 + + return _create_dataset_from_var( + *args, + **kwargs, + compression=compression_filter, + compression_opts=default_compression_opts, + ) def _create_root_data_entry(file_obj: h5py.File) -> h5py.Group: diff --git a/tests/exporter_test.py b/tests/exporter_test.py index 5b519c38..c5481985 100644 --- a/tests/exporter_test.py +++ b/tests/exporter_test.py @@ -75,7 +75,8 @@ def test_mcstas_reduction_export_to_bytestream( with pytest.warns( DeprecationWarning, match='Please use ``export_as_nxlauetof`` instead.' ): - export_as_nexus(reduced_data, bio) + with pytest.warns(UserWarning, match='bitshuffle.h5'): + export_as_nexus(reduced_data, bio) with h5py.File(bio, 'r') as f: assert 'NMX_data' in f nmx_data: h5py.Group = f.require_group('NMX_data') From 10dc2f000c0f2676e28f54d683c7ce9b186087ec Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Mar 2025 14:32:42 +0100 Subject: [PATCH 2/2] Update pytest to filter platform. --- tests/exporter_test.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/exporter_test.py b/tests/exporter_test.py index c5481985..379dbe62 100644 --- a/tests/exporter_test.py +++ b/tests/exporter_test.py @@ -54,6 +54,14 @@ def reduced_data() -> NMXReducedDataGroup: ) +def _is_bitshuffle_available() -> bool: + import platform + + return not ( + platform.machine().startswith("arm") or platform.platform().startswith('win') + ) + + def test_mcstas_reduction_export_to_bytestream( reduced_data: NMXReducedDataGroup, ) -> None: @@ -75,8 +83,15 @@ def test_mcstas_reduction_export_to_bytestream( with pytest.warns( DeprecationWarning, match='Please use ``export_as_nxlauetof`` instead.' ): - with pytest.warns(UserWarning, match='bitshuffle.h5'): + if not _is_bitshuffle_available(): + # bitshuffle does not build correctly on Windows and ARM machines + # We are keeping this test here to catch when it builds correctly + # in the future. + with pytest.warns(UserWarning, match='bitshuffle.h5'): + export_as_nexus(reduced_data, bio) + else: export_as_nexus(reduced_data, bio) + with h5py.File(bio, 'r') as f: assert 'NMX_data' in f nmx_data: h5py.Group = f.require_group('NMX_data')