From 80557ebf18e36711911772b933d5357ca1c5620c Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Fri, 26 Sep 2025 17:11:33 +0200 Subject: [PATCH 1/3] Implement data registry override --- src/ess/reduce/{data.py => data/__init__.py} | 96 ++---- src/ess/reduce/data/_registry.py | 291 +++++++++++++++++++ 2 files changed, 314 insertions(+), 73 deletions(-) rename src/ess/reduce/{data.py => data/__init__.py} (58%) create mode 100644 src/ess/reduce/data/_registry.py diff --git a/src/ess/reduce/data.py b/src/ess/reduce/data/__init__.py similarity index 58% rename from src/ess/reduce/data.py rename to src/ess/reduce/data/__init__.py index f065c2a2..5475299e 100644 --- a/src/ess/reduce/data.py +++ b/src/ess/reduce/data/__init__.py @@ -2,78 +2,12 @@ # Copyright (c) 2025 Scipp contributors (https://github.com/scipp) """Data files bundled with ESSreduce.""" -from functools import cache from pathlib import Path +from ._registry import Entry, LocalRegistry, PoochRegistry, Registry, make_registry -class Registry: - """A registry for data files. - - Note - ---- - This class requires [Pooch](https://www.fatiando.org/pooch/latest/) which - is not a hard dependency of ESSreduce and needs to be installed separately. - """ - - def __init__( - self, - instrument: str, - files: dict[str, str], - version: str, - retry_if_failed: int = 3, - ) -> None: - import pooch - - self._registry = pooch.create( - path=pooch.os_cache(f'ess/{instrument}'), - env=f'ESS_{instrument.upper()}_DATA_DIR', - base_url=f'https://public.esss.dk/groups/scipp/ess/{instrument}/' - + '{version}/', - version=version, - registry=files, - retry_if_failed=retry_if_failed, - ) - self._unzip_processor = pooch.Unzip() - - def __contains__(self, key: str) -> bool: - """Return True if the key is in the registry.""" - return key in self._registry.registry - - @cache # noqa: B019 - def get_path(self, name: str, unzip: bool = False) -> Path: - """Get the path to a file in the registry. - - Downloads the file if necessary. - - Note that return values of this method are cached to avoid recomputing - potentially expensive checksums. - This usually means that the ``Registry`` object itself gets stored until the - Python interpreter shuts down. - However, registries are small and do not own resources. - It is anyway expected that the registry objects are stored at - module scope and live until program exit. - - Parameters - ---------- - name: - Name of the file to get the path for. - unzip: - If `True`, unzip the file before returning the path. - - Returns - ------- - : - The Path to the file. - """ - return Path( - self._registry.fetch( - name, processor=self._unzip_processor if unzip else None - ) - ) - - -_bifrost_registry = Registry( - instrument='bifrost', +_bifrost_registry = make_registry( + 'ess/bifrost', files={ "BIFROST_20240914T053723.h5": "md5:0f2fa5c9a851f8e3a4fa61defaa3752e", }, @@ -81,8 +15,8 @@ def get_path(self, name: str, unzip: bool = False) -> Path: ) -_dream_registry = Registry( - instrument='dream', +_dream_registry = make_registry( + 'ess/dream', files={ "TEST_977695_00068064.hdf": "md5:9e6ee9ec70d7c5e8c0c93b9e07e8949f", }, @@ -90,8 +24,8 @@ def get_path(self, name: str, unzip: bool = False) -> Path: ) -_loki_registry = Registry( - instrument='loki', +_loki_registry = make_registry( + 'ess/loki', files={ # Files from LoKI@Larmor detector test experiment # @@ -152,3 +86,19 @@ def dream_coda_test_file() -> Path: See ``tools/shrink_nexus.py``. """ return _dream_registry.get_path('TEST_977695_00068064.hdf') + + +__all__ = [ + 'Entry', + 'LocalRegistry', + 'PoochRegistry', + 'Registry', + 'bifrost_simulated_elastic', + 'dream_coda_test_file', + 'loki_tutorial_background_run_60248', + 'loki_tutorial_background_run_60393', + 'loki_tutorial_sample_run_60250', + 'loki_tutorial_sample_run_60339', + 'loki_tutorial_sample_transmission_run', + 'make_registry', +] diff --git a/src/ess/reduce/data/_registry.py b/src/ess/reduce/data/_registry.py new file mode 100644 index 00000000..9db9626c --- /dev/null +++ b/src/ess/reduce/data/_registry.py @@ -0,0 +1,291 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Scipp contributors (https://github.com/scipp) + +from __future__ import annotations + +import hashlib +import os +from abc import ABC, abstractmethod +from collections.abc import Mapping +from dataclasses import dataclass +from functools import cache +from pathlib import Path +from typing import Any + +_LOCAL_CACHE_ENV_VAR = "SCIPP_DATA_DIR" +_LOCAL_REGISTRY_ENV_VAR = "SCIPP_OVERRIDE_DATA_DIR" + + +def make_registry( + prefix: str, + files: Mapping[str, str | Entry], + *, + version: str, + base_url: str = "https://public.esss.dk/groups/scipp", + retry_if_failed: int = 3, +) -> Registry: + """Create a file registry object. + + By default, this function creates a :class:`PoochRegistry` to download files + via HTTP from an online file store. + This can be overridden by setting the environment variable `SCIPP_DATA_DIR` to a + path on the local file system. + In this case, a :class:`LocalRegistry` is returned. + + Files are specified as a dict using either the Pooch string format explicitly + constructed :class:`Entry` objects: + + >>> from ess.reduce.data import Entry + >>> files = { + ... "file1.dat": "md5:1234567890abcdef", + ... "file2.csv": Entry(alg="md5", chk="abcdef123456789"), + ... "folder/nested.dat": "blake2b:1234567890abcdef", + ... "zipped.zip": Entry(alg="blake2b", chk="abcdef123456789", unzip=True), + ... } + + In the example above, the specification for ``file1.dat`` and ``file2.csv`` are + essentially equivalent. + ``folder/nested.dat`` is a file in a subfolder. + Paths like this must always use forward slashes (/) even on Windows. + + As shown above, it is possible to automatically unzip + files by specifying ``unzip=True``. + When calling ``registry.get_path("zipped.zip")`` the file will be unzipped and + a path to the content is returned. + This expects that there is only a single file in the zip archive. + + The complete path to the source file is constructed as follows: + + - Pooch: ``{base_url}/{prefix}/{version}/{name}`` + - Local: ``{SCIPP_OVERRIDE_DATA_DIR}/{prefix}/{version}/{name}`` + + When using Pooch, files are downloaded to the user's cache directory. + This can be controlled with the environment variable ``SCIPP_CACHE_DIR``. + + Parameters + ---------- + prefix: + Prefix to add to all file names. + files: + Mapping of file names to checksums or :class:`Entry` objects. + version: + A version string for the files. + base_url: + URL for the online file store. + Ignored if the override environment variable is set. + retry_if_failed: + Number of retries when downloading a file. + Ignored if the override environment variable is set. + + Returns + ------- + : + Either a :class:`PoochRegistry` or :class:`LocalRegistry`. + """ + if (override := os.environ.get(_LOCAL_REGISTRY_ENV_VAR)) is not None: + return LocalRegistry( + _check_local_override_path(override), prefix, files, version=version + ) + return PoochRegistry( + prefix, + files, + version=version, + base_url=base_url, + retry_if_failed=retry_if_failed, + ) + + +def _check_local_override_path(override: str) -> Path: + path = Path(override) + if not path.is_dir(): + raise ValueError( + f"The data override path '{override}' is not a directory. If you want to " + "download files instead, unset the environment variable " + f"{_LOCAL_REGISTRY_ENV_VAR}." + ) + return path + + +@dataclass(frozen=True, slots=True) +class Entry: + """An entry in a registry.""" + + chk: str + """Checksum.""" + alg: str + """Checksum algorithm.""" + unzip: bool = False + """Whether to unzip the file.""" + + @classmethod + def from_pooch_string(cls, pooch_string: str) -> Entry: + alg, chk = pooch_string.split(":") + return cls(chk=chk, alg=alg) + + +class Registry(ABC): + def __init__(self, files: Mapping[str, str | Entry]) -> None: + self._files = _to_file_entries(files) + + @abstractmethod + def get_path(self, name: str) -> Path: + """Get the path to a file in the registry. + + Depending on the implementation, the file is downloaded if necessary. + + Note that implementations are allowed to cache return values of this method + to avoid recomputing potentially expensive checksums. + This usually means that the ``Registry`` object itself gets stored until the + Python interpreter shuts down. + However, registries are small and do not own resources. + + Parameters + ---------- + name: + Name of the file to get the path for. + + Returns + ------- + : + The Path to the file. + """ + + def _needs_unzip(self, name: str) -> bool: + return self._files[name].unzip + + +class PoochRegistry(Registry): + def __init__( + self, + prefix: str, + files: Mapping[str, str | Entry], + *, + version: str, + base_url: str, + retry_if_failed: int = 3, + ) -> None: + try: + import pooch + except ImportError: + raise ImportError( + "You need to install Pooch to use the PoochRegistry." + ) from None + + self._registry = pooch.create( + path=pooch.os_cache(prefix), + env=_LOCAL_CACHE_ENV_VAR, + base_url=f'{base_url}/{prefix}/{version}/', + registry=_to_pooch_registry(files), + retry_if_failed=retry_if_failed, + ) + self._unzip_processor = pooch.Unzip() + + super().__init__(files) + + @cache # noqa: B019 + def get_path(self, name: str) -> Path: + """Get the path to a file in the registry. + + Downloads the file if necessary. + """ + if self._needs_unzip(name): + paths = self._registry.fetch(name, processor=self._unzip_processor) + if len(paths) != 1: + raise ValueError( + f"Expected exactly one file to unzip, got {len(paths)} in '{name}'." + ) + return Path(paths[0]) + return Path(self._registry.fetch(name)) + + +class LocalRegistry(Registry): + def __init__( + self, path: Path, prefix: str, files: Mapping[str, str | Entry], *, version: str + ) -> None: + super().__init__(files) + self._path = path.resolve().joinpath(*prefix.split("/"), version) + + @cache # noqa: B019 + def get_path(self, name: str) -> Path: + """Get the path to a file in the registry.""" + try: + entry = self._files[name] + except KeyError: + raise ValueError(f"File '{name}' is not in the registry.") from None + + path = self._local_path(name) + if not path.exists(): + raise FileNotFoundError( + f"File '{name}' is registered but does not exist on the file system. " + f"Expected it at '{path}'." + ) + + _check_hash(name, path, entry) + + # TODO unzip + + return path + + def _local_path(self, name: str) -> Path: + # Split on "/" because `name` is always a POSIX-style path, but the return + # value is a system path, i.e., it can be a Windows-style path. + return self._path.joinpath(*name.split("/")) + + +def _check_hash(name: str, path: Path, entry: Entry) -> None: + new_chk = _checksum_of_file(path, algorithm=entry.alg) + if new_chk.lower() != entry.chk.lower(): + raise ValueError( + f"{entry.alg} hash of file '{name}' does not match the known hash: " + f"expected {entry.chk}, got {new_chk}." + ) + + +def _to_file_entries(files: Mapping[str, str | Entry]) -> dict[str, Entry]: + return { + name: entry if isinstance(entry, Entry) else Entry.from_pooch_string(entry) + for name, entry in files.items() + } + + +def _to_pooch_registry(files: Mapping[str, str | Entry]) -> dict[str, str]: + return { + name: f"{entry.alg}:{entry.chk}" if isinstance(entry, Entry) else entry + for name, entry in files.items() + } + + +# Code taken from Scitacean and Pooch. +def _checksum_of_file(path: Path, *, algorithm: str) -> str: + """Compute the checksum of a local file. + + Parameters + ---------- + path: + Path of the file. + algorithm: + Hash algorithm to use. + Can be any algorithm supported by :func:`hashlib.new`. + + Returns + ------- + : + The hex digest of the hash. + """ + chk = _new_hash(algorithm) + # size based on http://git.savannah.gnu.org/gitweb/?p=coreutils.git;a=blob;f=src/ioblksize.h;h=ed2f4a9c4d77462f357353eb73ee4306c28b37f1;hb=HEAD#l23 # noqa: E501 + buffer = memoryview(bytearray(128 * 1024)) + with open(path, "rb", buffering=0) as file: + for n in iter(lambda: file.readinto(buffer), 0): + chk.update(buffer[:n]) + return chk.hexdigest() # type: ignore[no-any-return] + + +def _new_hash(algorithm: str) -> Any: + # Try to use a named constructor instead of hashlib.new where possible + # because that is supposed to be faster, according to + # https://docs.python.org/3/library/hashlib.html#hashlib.new + try: + return getattr(hashlib, algorithm)() + except AttributeError: + return hashlib.new(algorithm, usedforsecurity=False) From 59fa8775abd5f6f9c1266d8e4a74e8f5609bf146 Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Mon, 29 Sep 2025 10:41:12 +0200 Subject: [PATCH 2/3] Unzip files from local source --- src/ess/reduce/data/__init__.py | 6 +- src/ess/reduce/data/_registry.py | 118 ++++++++++++++++++++++++------- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/ess/reduce/data/__init__.py b/src/ess/reduce/data/__init__.py index 5475299e..43d6945b 100644 --- a/src/ess/reduce/data/__init__.py +++ b/src/ess/reduce/data/__init__.py @@ -1,6 +1,10 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2025 Scipp contributors (https://github.com/scipp) -"""Data files bundled with ESSreduce.""" +"""Data files bundled with ESSreduce. + +This module requires the Pooch package which is not a hard dependency of ESSreduce. +It has to be installed separately with either pip or conda. +""" from pathlib import Path diff --git a/src/ess/reduce/data/_registry.py b/src/ess/reduce/data/_registry.py index 9db9626c..c649813c 100644 --- a/src/ess/reduce/data/_registry.py +++ b/src/ess/reduce/data/_registry.py @@ -43,7 +43,7 @@ def make_registry( ... "zipped.zip": Entry(alg="blake2b", chk="abcdef123456789", unzip=True), ... } - In the example above, the specification for ``file1.dat`` and ``file2.csv`` are + In the example above, the specifications for ``file1.dat`` and ``file2.csv`` are essentially equivalent. ``folder/nested.dat`` is a file in a subfolder. Paths like this must always use forward slashes (/) even on Windows. @@ -84,7 +84,12 @@ def make_registry( """ if (override := os.environ.get(_LOCAL_REGISTRY_ENV_VAR)) is not None: return LocalRegistry( - _check_local_override_path(override), prefix, files, version=version + _check_local_override_path(override), + prefix, + files, + version=version, + base_url=base_url, + retry_if_failed=retry_if_failed, ) return PoochRegistry( prefix, @@ -164,22 +169,14 @@ def __init__( base_url: str, retry_if_failed: int = 3, ) -> None: - try: - import pooch - except ImportError: - raise ImportError( - "You need to install Pooch to use the PoochRegistry." - ) from None - - self._registry = pooch.create( - path=pooch.os_cache(prefix), - env=_LOCAL_CACHE_ENV_VAR, - base_url=f'{base_url}/{prefix}/{version}/', - registry=_to_pooch_registry(files), + self._registry = _create_pooch( + prefix, + files, + version=version, + base_url=base_url, retry_if_failed=retry_if_failed, ) - self._unzip_processor = pooch.Unzip() - + self._unzip_processor = _import_pooch().Unzip() super().__init__(files) @cache # noqa: B019 @@ -189,21 +186,36 @@ def get_path(self, name: str) -> Path: Downloads the file if necessary. """ if self._needs_unzip(name): - paths = self._registry.fetch(name, processor=self._unzip_processor) - if len(paths) != 1: - raise ValueError( - f"Expected exactly one file to unzip, got {len(paths)} in '{name}'." - ) - return Path(paths[0]) + paths: list[str] = self._registry.fetch( # type: ignore[assignment] + name, processor=self._unzip_processor + ) + return Path(_expect_single_unzipped(paths, name)) return Path(self._registry.fetch(name)) class LocalRegistry(Registry): def __init__( - self, path: Path, prefix: str, files: Mapping[str, str | Entry], *, version: str + self, + source_path: Path, + prefix: str, + files: Mapping[str, str | Entry], + *, + version: str, + base_url: str, + retry_if_failed: int = 3, ) -> None: + # Piggyback off of Pooch to determine the cache directory. + pooch_registry = _create_pooch( + prefix, + files, + version=version, + base_url=base_url, + retry_if_failed=retry_if_failed, + ) + pooch = _import_pooch() + self._unzip_processor = pooch.processors.Unzip(extract_dir=pooch_registry.path) + self._source_path = source_path.resolve().joinpath(*prefix.split("/"), version) super().__init__(files) - self._path = path.resolve().joinpath(*prefix.split("/"), version) @cache # noqa: B019 def get_path(self, name: str) -> Path: @@ -222,14 +234,66 @@ def get_path(self, name: str) -> Path: _check_hash(name, path, entry) - # TODO unzip - + if self._needs_unzip(name): + return Path( + _expect_single_unzipped( + self._unzip_processor(os.fspath(path), "download", None), path + ) + ) return path def _local_path(self, name: str) -> Path: # Split on "/" because `name` is always a POSIX-style path, but the return # value is a system path, i.e., it can be a Windows-style path. - return self._path.joinpath(*name.split("/")) + return self._source_path.joinpath(*name.split("/")) + + +def _import_pooch() -> Any: + try: + import pooch + except ImportError: + raise ImportError( + "You need to install Pooch to access test and tutorial files. " + "See https://www.fatiando.org/pooch/latest/index.html" + ) from None + + return pooch + + +def _create_pooch( + prefix: str, + files: Mapping[str, str | Entry], + *, + version: str, + base_url: str, + retry_if_failed: int = 3, +) -> Any: + pooch = _import_pooch() + return pooch.create( + path=pooch.os_cache(prefix), + env=_LOCAL_CACHE_ENV_VAR, + base_url=f'{base_url}/{prefix}/{version}/', + registry=_to_pooch_registry(files), + retry_if_failed=retry_if_failed, + ) + + +def _pooch_unzip_processor(extract_dir: Path) -> Any: + try: + import pooch + except ImportError: + raise ImportError("You need to install Pooch to unzip files.") from None + + return pooch.processors.Unzip(extract_dir=os.fspath(extract_dir)) + + +def _expect_single_unzipped(paths: list[str], archive: str | os.PathLike) -> str: + if len(paths) != 1: + raise ValueError( + f"Expected exactly one file to unzip, got {len(paths)} in " + f"'{os.fspath(archive)}'." + ) + return paths[0] def _check_hash(name: str, path: Path, entry: Entry) -> None: From a1874430449218124bb6a889ed1e82f9cbc6542e Mon Sep 17 00:00:00 2001 From: Jan-Lukas Wynen Date: Thu, 9 Oct 2025 10:56:02 +0200 Subject: [PATCH 3/3] Move data registries into tests --- src/ess/reduce/data/__init__.py | 91 ------------------------- tests/conftest.py | 103 +++++++++++++++++++++++++++++ tests/nexus/json_generator_test.py | 21 ++++-- tests/nexus/workflow_test.py | 46 +++++++------ 4 files changed, 143 insertions(+), 118 deletions(-) create mode 100644 tests/conftest.py diff --git a/src/ess/reduce/data/__init__.py b/src/ess/reduce/data/__init__.py index 43d6945b..3d7d859f 100644 --- a/src/ess/reduce/data/__init__.py +++ b/src/ess/reduce/data/__init__.py @@ -6,103 +6,12 @@ It has to be installed separately with either pip or conda. """ -from pathlib import Path - from ._registry import Entry, LocalRegistry, PoochRegistry, Registry, make_registry -_bifrost_registry = make_registry( - 'ess/bifrost', - files={ - "BIFROST_20240914T053723.h5": "md5:0f2fa5c9a851f8e3a4fa61defaa3752e", - }, - version='1', -) - - -_dream_registry = make_registry( - 'ess/dream', - files={ - "TEST_977695_00068064.hdf": "md5:9e6ee9ec70d7c5e8c0c93b9e07e8949f", - }, - version='2', -) - - -_loki_registry = make_registry( - 'ess/loki', - files={ - # Files from LoKI@Larmor detector test experiment - # - # Background run 1 (no sample, sample holder/can only, no transmission monitor) - '60248-2022-02-28_2215.nxs': 'md5:d9f17b95274a0fc6468df7e39df5bf03', - # Sample run 1 (sample + sample holder/can, no transmission monitor in beam) - '60250-2022-02-28_2215.nxs': 'md5:6a519ceaacbae702a6d08241e86799b1', - # Sample run 2 (sample + sample holder/can, no transmission monitor in beam) - '60339-2022-02-28_2215.nxs': 'md5:03c86f6389566326bb0cbbd80b8f8c4f', - # Background transmission run (sample holder/can + transmission monitor) - '60392-2022-02-28_2215.nxs': 'md5:9ecc1a9a2c05a880144afb299fc11042', - # Background run 2 (no sample, sample holder/can only, no transmission monitor) - '60393-2022-02-28_2215.nxs': 'md5:bf550d0ba29931f11b7450144f658652', - # Sample transmission run (sample + sample holder/can + transmission monitor) - '60394-2022-02-28_2215.nxs': 'md5:c40f38a62337d86957af925296c4c615', - # Analytical model for the I(Q) of the Poly-Gauss sample - 'PolyGauss_I0-50_Rg-60.h5': 'md5:f5d60d9c2286cb197b8cd4dc82db3d7e', - # XML file for the pixel mask - 'mask_new_July2022.xml': 'md5:421b6dc9db74126ffbc5d88164d017b0', - }, - version='2', -) - - -def bifrost_simulated_elastic() -> Path: - """McStas simulation with elastic incoherent scattering + phonon.""" - return _bifrost_registry.get_path('BIFROST_20240914T053723.h5') - - -def loki_tutorial_sample_run_60250() -> Path: - """Sample run with sample and sample holder/can, no transmission monitor in beam.""" - return _loki_registry.get_path('60250-2022-02-28_2215.nxs') - - -def loki_tutorial_sample_run_60339() -> Path: - """Sample run with sample and sample holder/can, no transmission monitor in beam.""" - return _loki_registry.get_path('60339-2022-02-28_2215.nxs') - - -def loki_tutorial_background_run_60248() -> Path: - """Background run with sample holder/can only, no transmission monitor.""" - return _loki_registry.get_path('60248-2022-02-28_2215.nxs') - - -def loki_tutorial_background_run_60393() -> Path: - """Background run with sample holder/can only, no transmission monitor.""" - return _loki_registry.get_path('60393-2022-02-28_2215.nxs') - - -def loki_tutorial_sample_transmission_run() -> Path: - """Sample transmission run (sample + sample holder/can + transmission monitor).""" - return _loki_registry.get_path('60394-2022-02-28_2215.nxs') - - -def dream_coda_test_file() -> Path: - """CODA file for DREAM where most pulses have been removed. - - See ``tools/shrink_nexus.py``. - """ - return _dream_registry.get_path('TEST_977695_00068064.hdf') - - __all__ = [ 'Entry', 'LocalRegistry', 'PoochRegistry', 'Registry', - 'bifrost_simulated_elastic', - 'dream_coda_test_file', - 'loki_tutorial_background_run_60248', - 'loki_tutorial_background_run_60393', - 'loki_tutorial_sample_run_60250', - 'loki_tutorial_sample_run_60339', - 'loki_tutorial_sample_transmission_run', 'make_registry', ] diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..6c6ea524 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Scipp contributors (https://github.com/scipp) + +from pathlib import Path + +import pytest + +from ess.reduce.data import Registry, make_registry + + +@pytest.fixture(scope='session') +def bifrost_registry() -> Registry: + return make_registry( + 'ess/bifrost', + files={ + "BIFROST_20240914T053723.h5": "md5:0f2fa5c9a851f8e3a4fa61defaa3752e", + }, + version='1', + ) + + +@pytest.fixture(scope='session') +def dream_registry() -> Registry: + return make_registry( + 'ess/dream', + files={ + "TEST_977695_00068064.hdf": "md5:9e6ee9ec70d7c5e8c0c93b9e07e8949f", + }, + version='2', + ) + + +@pytest.fixture(scope='session') +def loki_registry() -> Registry: + return make_registry( + 'ess/loki', + files={ + # Files from LoKI@Larmor detector test experiment + # + # Background run 1 (no sample, sample holder/can only, no transmission monitor) # noqa: E501 + '60248-2022-02-28_2215.nxs': 'md5:d9f17b95274a0fc6468df7e39df5bf03', + # Sample run 1 (sample + sample holder/can, no transmission monitor in beam) + '60250-2022-02-28_2215.nxs': 'md5:6a519ceaacbae702a6d08241e86799b1', + # Sample run 2 (sample + sample holder/can, no transmission monitor in beam) + '60339-2022-02-28_2215.nxs': 'md5:03c86f6389566326bb0cbbd80b8f8c4f', + # Background transmission run (sample holder/can + transmission monitor) + '60392-2022-02-28_2215.nxs': 'md5:9ecc1a9a2c05a880144afb299fc11042', + # Background run 2 (no sample, sample holder/can only, no transmission monitor) # noqa: E501 + '60393-2022-02-28_2215.nxs': 'md5:bf550d0ba29931f11b7450144f658652', + # Sample transmission run (sample + sample holder/can + transmission monitor) # noqa: E501 + '60394-2022-02-28_2215.nxs': 'md5:c40f38a62337d86957af925296c4c615', + # Analytical model for the I(Q) of the Poly-Gauss sample + 'PolyGauss_I0-50_Rg-60.h5': 'md5:f5d60d9c2286cb197b8cd4dc82db3d7e', + # XML file for the pixel mask + 'mask_new_July2022.xml': 'md5:421b6dc9db74126ffbc5d88164d017b0', + }, + version='2', + ) + + +@pytest.fixture(scope='session') +def bifrost_simulated_elastic(bifrost_registry: Registry) -> Path: + """McStas simulation with elastic incoherent scattering + phonon.""" + return bifrost_registry.get_path('BIFROST_20240914T053723.h5') + + +@pytest.fixture(scope='session') +def loki_tutorial_sample_run_60250(loki_registry: Registry) -> Path: + """Sample run with sample and sample holder/can, no transmission monitor in beam.""" + return loki_registry.get_path('60250-2022-02-28_2215.nxs') + + +@pytest.fixture(scope='session') +def loki_tutorial_sample_run_60339(loki_registry: Registry) -> Path: + """Sample run with sample and sample holder/can, no transmission monitor in beam.""" + return loki_registry.get_path('60339-2022-02-28_2215.nxs') + + +@pytest.fixture(scope='session') +def loki_tutorial_background_run_60248(loki_registry: Registry) -> Path: + """Background run with sample holder/can only, no transmission monitor.""" + return loki_registry.get_path('60248-2022-02-28_2215.nxs') + + +@pytest.fixture(scope='session') +def loki_tutorial_background_run_60393(loki_registry: Registry) -> Path: + """Background run with sample holder/can only, no transmission monitor.""" + return loki_registry.get_path('60393-2022-02-28_2215.nxs') + + +@pytest.fixture(scope='session') +def loki_tutorial_sample_transmission_run(loki_registry: Registry) -> Path: + """Sample transmission run (sample + sample holder/can + transmission monitor).""" + return loki_registry.get_path('60394-2022-02-28_2215.nxs') + + +@pytest.fixture(scope='session') +def dream_coda_test_file(dream_registry: Registry) -> Path: + """CODA file for DREAM where most pulses have been removed. + + See ``tools/shrink_nexus.py``. + """ + return dream_registry.get_path('TEST_977695_00068064.hdf') diff --git a/tests/nexus/json_generator_test.py b/tests/nexus/json_generator_test.py index d6b53ce1..83266d38 100644 --- a/tests/nexus/json_generator_test.py +++ b/tests/nexus/json_generator_test.py @@ -1,16 +1,19 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 Scipp contributors (https://github.com/scipp) +from pathlib import Path + import pytest import scippnexus as snx from scipp.testing import assert_identical -from ess.reduce import data from ess.reduce.nexus.json_generator import event_data_generator from ess.reduce.nexus.json_nexus import json_nexus_group -def test_event_data_generator_monitor_events_round_trip() -> None: - filename = data.loki_tutorial_sample_run_60250() +def test_event_data_generator_monitor_events_round_trip( + loki_tutorial_sample_run_60250: Path, +) -> None: + filename = loki_tutorial_sample_run_60250 monitor = snx.load(filename, root='entry/instrument/monitor_1/monitor_1_events') generator = event_data_generator(monitor) for i in range(len(monitor)): @@ -20,8 +23,10 @@ def test_event_data_generator_monitor_events_round_trip() -> None: next(generator) -def test_event_data_generator_detector_events_round_trip() -> None: - filename = data.loki_tutorial_sample_run_60250() +def test_event_data_generator_detector_events_round_trip( + loki_tutorial_sample_run_60250: Path, +) -> None: + filename = loki_tutorial_sample_run_60250 detector = snx.load( filename, root='entry/instrument/larmor_detector/larmor_detector_events' ) @@ -31,8 +36,10 @@ def test_event_data_generator_detector_events_round_trip() -> None: assert_identical(group[()], detector[i : i + 1]) -def test_event_data_generator_without_event_id_yields_ones() -> None: - filename = data.loki_tutorial_sample_run_60250() +def test_event_data_generator_without_event_id_yields_ones( + loki_tutorial_sample_run_60250: Path, +) -> None: + filename = loki_tutorial_sample_run_60250 base = snx.load(filename, root='entry/instrument/monitor_1/monitor_1_events') monitor = base.bins.drop_coords('event_id') generator = event_data_generator(monitor) diff --git a/tests/nexus/workflow_test.py b/tests/nexus/workflow_test.py index 941d217d..d62a1e43 100644 --- a/tests/nexus/workflow_test.py +++ b/tests/nexus/workflow_test.py @@ -1,13 +1,13 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 Scipp contributors (https://github.com/scipp) from datetime import UTC, datetime +from pathlib import Path import pytest import scipp as sc import scippnexus as snx from scipp.testing import assert_identical -from ess.reduce import data from ess.reduce.nexus import compute_component_position, workflow from ess.reduce.nexus.types import ( BackgroundRun, @@ -176,11 +176,11 @@ def test_to_transform_raises_if_interval_does_not_yield_unique_value( ) -def test_given_no_sample_load_nexus_sample_returns_group_with_origin_depends_on() -> ( - None -): +def test_given_no_sample_load_nexus_sample_returns_group_with_origin_depends_on( + loki_tutorial_sample_run_60250: Path, +) -> None: filespec = workflow.file_path_to_file_spec( - data.loki_tutorial_sample_run_60250(), preopen=True + loki_tutorial_sample_run_60250, preopen=True ) spec = workflow.unique_component_spec(filespec) assert spec.filename['/entry'][snx.NXsample] == {} @@ -549,9 +549,9 @@ def test_assemble_monitor_preserves_masks(calibrated_monitor, monitor_event_data assert 'mymask' in monitor_data.masks -def test_load_event_monitor_workflow() -> None: +def test_load_event_monitor_workflow(loki_tutorial_sample_run_60250: Path) -> None: wf = LoadMonitorWorkflow(run_types=[SampleRun], monitor_types=[FrameMonitor1]) - wf[Filename[SampleRun]] = data.loki_tutorial_sample_run_60250() + wf[Filename[SampleRun]] = loki_tutorial_sample_run_60250 wf[NeXusName[FrameMonitor1]] = 'monitor_1' da = wf.compute(MonitorData[SampleRun, FrameMonitor1]) assert 'position' in da.coords @@ -561,9 +561,9 @@ def test_load_event_monitor_workflow() -> None: assert da.bins.constituents['data'].variances is not None -def test_load_histogram_monitor_workflow() -> None: +def test_load_histogram_monitor_workflow(dream_coda_test_file: Path) -> None: wf = LoadMonitorWorkflow(run_types=[SampleRun], monitor_types=[FrameMonitor1]) - wf[Filename[SampleRun]] = data.dream_coda_test_file() + wf[Filename[SampleRun]] = dream_coda_test_file wf[NeXusName[FrameMonitor1]] = 'monitor_bunker' da = wf.compute(MonitorData[SampleRun, FrameMonitor1]) assert 'position' in da.coords @@ -575,9 +575,9 @@ def test_load_histogram_monitor_workflow() -> None: assert da.variances is not None -def test_load_detector_workflow() -> None: +def test_load_detector_workflow(loki_tutorial_sample_run_60250: Path) -> None: wf = LoadDetectorWorkflow(run_types=[SampleRun], monitor_types=[]) - wf[Filename[SampleRun]] = data.loki_tutorial_sample_run_60250() + wf[Filename[SampleRun]] = loki_tutorial_sample_run_60250 wf[NeXusName[snx.NXdetector]] = 'larmor_detector' da = wf.compute(DetectorData[SampleRun]) assert 'position' in da.coords @@ -588,9 +588,11 @@ def test_load_detector_workflow() -> None: @pytest.mark.parametrize('preopen', [True, False]) -def test_generic_nexus_workflow(preopen: bool) -> None: +def test_generic_nexus_workflow( + preopen: bool, loki_tutorial_sample_run_60250: Path +) -> None: wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[FrameMonitor1]) - wf[Filename[SampleRun]] = data.loki_tutorial_sample_run_60250() + wf[Filename[SampleRun]] = loki_tutorial_sample_run_60250 wf[NeXusName[FrameMonitor1]] = 'monitor_1' wf[NeXusName[snx.NXdetector]] = 'larmor_detector' wf[PreopenNeXusFile] = preopen @@ -607,9 +609,9 @@ def test_generic_nexus_workflow(preopen: bool) -> None: assert da.dims == ('event_time_zero',) -def test_generic_nexus_workflow_load_choppers() -> None: +def test_generic_nexus_workflow_load_choppers(bifrost_simulated_elastic: Path) -> None: wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[]) - wf[Filename[SampleRun]] = data.bifrost_simulated_elastic() + wf[Filename[SampleRun]] = bifrost_simulated_elastic choppers = wf.compute(RawChoppers[SampleRun]) assert choppers.keys() == { @@ -626,9 +628,11 @@ def test_generic_nexus_workflow_load_choppers() -> None: assert chopper['slit_edges'].shape == (2,) -def test_generic_nexus_workflow_load_beamline_metadata() -> None: +def test_generic_nexus_workflow_load_beamline_metadata( + bifrost_simulated_elastic: Path, +) -> None: wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[]) - wf[Filename[SampleRun]] = data.bifrost_simulated_elastic() + wf[Filename[SampleRun]] = bifrost_simulated_elastic beamline = wf.compute(Beamline) assert beamline.name == 'BIFROST' @@ -636,10 +640,12 @@ def test_generic_nexus_workflow_load_beamline_metadata() -> None: assert beamline.site == 'ESS' -def test_generic_nexus_workflow_load_measurement_metadata() -> None: +def test_generic_nexus_workflow_load_measurement_metadata( + loki_tutorial_sample_run_60250: Path, loki_tutorial_background_run_60248: Path +) -> None: wf = GenericNeXusWorkflow(run_types=[SampleRun], monitor_types=[]) - wf[Filename[SampleRun]] = data.loki_tutorial_sample_run_60250() - wf[Filename[BackgroundRun]] = data.loki_tutorial_background_run_60248() + wf[Filename[SampleRun]] = loki_tutorial_sample_run_60250 + wf[Filename[BackgroundRun]] = loki_tutorial_background_run_60248 measurement = wf.compute(Measurement) assert measurement.title == 'My experiment'