Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ dependencies = [
"pandas",
"gemmi",
"defusedxml",
"bitshuffle",
]

dynamic = ["version"]
Expand Down
54 changes: 45 additions & 9 deletions src/ess/nmx/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from functools import partial
from typing import Any, TypeVar

import bitshuffle.h5
import h5py
import numpy as np
import sciline as sl
Expand Down Expand Up @@ -34,6 +35,7 @@ def _create_dataset_from_var(
long_name: str | None = None,
compression: str | None = None,
compression_opts: int | None = None,
chunks: tuple[int, ...] | int | bool | None = None,
dtype: Any = None,
) -> h5py.Dataset:
compression_options = {}
Expand All @@ -45,6 +47,7 @@ def _create_dataset_from_var(
dataset = root_entry.create_dataset(
name,
data=var.values if dtype is None else var.values.astype(dtype, copy=False),
chunks=chunks,
**compression_options,
)
if var.unit is not None:
Expand All @@ -56,9 +59,20 @@ def _create_dataset_from_var(

_create_compressed_dataset = partial(
_create_dataset_from_var,
compression="gzip",
compression_opts=4,
compression=bitshuffle.h5.H5FILTER,
compression_opts=(0, bitshuffle.h5.H5_COMPRESS_LZ4),
)
"""Create dataset with compression options.

[``Bitshuffle/LZ4``](https://github.com/kiyo-masui/bitshuffle) is used for convenience.
Since ``Dectris`` uses it for their Nexus file compression, it is compatible with DIALS.
``Bitshuffle/LZ4`` tends to give similar results to
GZIP and other compression algorithms with better performance.
A naive implementation of bitshuffle/LZ4 compression,
shown in [issue #124](https://github.com/scipp/essnmx/issues/124),
led to 80% file reduction (365 MB vs 1.8 GB).

"""


def _create_root_data_entry(file_obj: h5py.File) -> h5py.Group:
Expand Down Expand Up @@ -393,7 +407,9 @@ def _export_detector_metadata_as_nxlauetof(
def _export_reduced_data_as_nxlauetof(
dg: NMXReducedDataGroup,
output_file: str | pathlib.Path | io.BytesIO,
*,
append_mode: bool = True,
compress_counts: bool = True,
) -> None:
"""Export the reduced data to a NeXus file with the LAUE_TOF application definition.

Expand All @@ -413,6 +429,9 @@ def _export_reduced_data_as_nxlauetof(
If ``False``, the file is opened in None-append mode.
> None-append mode is not supported for now.
> Only append mode is supported for now.
compress_counts:
If ``True``, the detector counts are compressed using bitshuffle.
It is because only the detector counts are expected to be large.

"""
if not append_mode:
Expand All @@ -424,12 +443,25 @@ def _export_reduced_data_as_nxlauetof(
# The actual application definition defines it as integer,
# but we keep the original data type for now
num_x, num_y = dg["detector_shape"].value # Probably better way to do this
data_dset = _create_dataset_from_var(
name="data",
root_entry=nx_detector,
var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}),
dtype=np.uint,
)
if compress_counts:
data_dset = _create_compressed_dataset(
name="data",
root_entry=nx_detector,
var=sc.fold(
dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}
),
chunks=(num_x, num_y, 1),
dtype=np.uint,
)
else:
data_dset = _create_dataset_from_var(
name="data",
root_entry=nx_detector,
var=sc.fold(
dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}
),
dtype=np.uint,
)
data_dset.attrs["signal"] = 1
_create_dataset_from_var(
name='time_of_flight',
Expand Down Expand Up @@ -462,12 +494,14 @@ def __init__(
chunk_generator: Callable[[FilePath, DetectorName], Generator[T, None, None]],
chunk_insert_key: type[T],
extra_meta: dict[str, sc.Variable] | None = None,
compress_counts: bool = True,
overwrite: bool = False,
) -> None:
from ess.reduce.streaming import EternalAccumulator, StreamProcessor

from .types import FilePath, NMXReducedCounts

self.compress_counts = compress_counts
self._chunk_generator = chunk_generator
self._chunk_insert_key = chunk_insert_key
self._workflow = workflow
Expand Down Expand Up @@ -516,6 +550,8 @@ def add_panel(self, *, detector_id: DetectorIndex | DetectorName) -> None:
results = processor.add_chunk({self._chunk_insert_key: da})

_export_reduced_data_as_nxlauetof(
results[NMXReducedDataGroup], self._output_filename
results[NMXReducedDataGroup],
self._output_filename,
compress_counts=self.compress_counts,
)
return results[NMXReducedDataGroup]
Loading