diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 148fe17..d6b3d8f 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -8,6 +8,45 @@ "In this example, we will process McStas events chunk by chunk, panel by panel." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TL;DR\n", + "\n", + "There is a reduction script that process the dataset chunk by chunk.\n", + "\n", + "> It may have some hard-coded parameters due to unfinished work on the reduction workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import os\n", + "from ess.nmx.data import small_mcstas_3_sample\n", + "\n", + "os.environ['INTPUT_FILE_PATH'] = small_mcstas_3_sample()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!essnmx_reduce_mcstas \\\n", + " --input_file ${INTPUT_FILE_PATH} \\\n", + " --output_file script-test.h5 \\\n", + " --verbose \\\n", + " --chunk_size 10_000_000 \\\n", + " --detector_ids 0 1 2\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -37,17 +76,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Compute Maximum Probabiliity\n", + "## Compute Raw Data Metadata\n", "\n", - "`McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", + "`time-of-flight` coordinate and `McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", "\n", - "Therefore we need to compute `McStasWeight2CoutScaleFactor` before we compute `NMXReducedDataGroup`.\n", + "Therefore we need to compute `TimeBinStep` and `McStasWeight2CoutScaleFactor` before we compute `NMXReducedData`.\n", "\n", "It can be done by `ess.reduce.streaming.StreamProcessor`.\n", "\n", - "In this example, `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", + "In this example, `MinimumTimeOfArrival`, `MaximumTimeOfArrival` and `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", "\n", - "`MaxAccumulator` remembers the previous maximum value and compute new maximum value with the new chunk.\n", + "`(Min/Max)Accumulator` remembers the previous minimum/maximum value and compute new minimum/maximum value with the new chunk.\n", "\n", "``raw_event_data_chunk_generator`` yields a chunk of raw event probability from mcstas h5 file.\n", "\n", @@ -62,16 +101,20 @@ "source": [ "from functools import partial\n", "from ess.reduce.streaming import StreamProcessor\n", - "from ess.nmx.streaming import MaxAccumulator\n", + "from ess.nmx.streaming import MaxAccumulator, MinAccumulator\n", "\n", "# Stream processor building helper\n", "scalefactor_stream_processor = partial(\n", " StreamProcessor,\n", " dynamic_keys=(RawEventProbability,),\n", - " target_keys=(McStasWeight2CountScaleFactor,),\n", - " accumulators={MaximumProbability: MaxAccumulator},\n", + " target_keys=(NMXRawDataMetadata,),\n", + " accumulators={\n", + " MaximumProbability: MaxAccumulator,\n", + " MaximumTimeOfArrival: MaxAccumulator,\n", + " MinimumTimeOfArrival: MinAccumulator,\n", + " },\n", ")\n", - "scalefactor_wf = wf.copy()" + "metadata_wf = wf.copy()" ] }, { @@ -80,7 +123,7 @@ "metadata": {}, "outputs": [], "source": [ - "scalefactor_wf.visualize(McStasWeight2CountScaleFactor, graph_attr={\"rankdir\": \"TD\"}, compact=True)" + "metadata_wf.visualize(NMXRawDataMetadata, graph_attr={\"rankdir\": \"TD\"}, compact=True)" ] }, { @@ -90,7 +133,10 @@ "outputs": [], "source": [ "from ess.nmx.types import DetectorName\n", - "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", + "from ess.nmx.mcstas.load import (\n", + " raw_event_data_chunk_generator,\n", + " mcstas_weight_to_probability_scalefactor,\n", + ")\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", "\n", @@ -99,10 +145,11 @@ "NUM_DETECTORS = 3\n", "\n", "# Loop over the detectors\n", - "file_path = scalefactor_wf.compute(FilePath)\n", - "scale_factors = {}\n", + "file_path = metadata_wf.compute(FilePath)\n", + "raw_data_metadatas = {}\n", + "\n", "for detector_i in range(0, NUM_DETECTORS):\n", - " temp_wf = scalefactor_wf.copy()\n", + " temp_wf = metadata_wf.copy()\n", " temp_wf[DetectorIndex] = detector_i\n", " detector_name = temp_wf.compute(DetectorName)\n", " max_chunk_id = calculate_number_of_chunks(\n", @@ -123,11 +170,54 @@ " else:\n", " results = processor.add_chunk({RawEventProbability: da})\n", " cur_detector_progress_bar.value += 1\n", - " scale_factors[detector_i] = results[McStasWeight2CountScaleFactor]\n", + " display(results[NMXRawDataMetadata])\n", + " raw_data_metadatas[detector_i] = results[NMXRawDataMetadata]\n", + "\n", + "# We take the min/maximum values of the scale factor\n", + "min_toa = min(meta.min_toa for meta in raw_data_metadatas.values())\n", + "max_toa = max(meta.max_toa for meta in raw_data_metadatas.values())\n", + "max_probability = max(meta.max_probability for meta in raw_data_metadatas.values())\n", + "\n", + "toa_bin_edges = sc.linspace(dim='t', start=min_toa, stop=max_toa, num=51)\n", + "scale_factor = mcstas_weight_to_probability_scalefactor(\n", + " max_counts=wf.compute(MaximumCounts), max_probability=max_probability\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compute Metadata\n", + "\n", + "Other metadata does not require any chunk-based computation.\n", "\n", - "# We take the minimum scale factor for the entire dataset\n", - "scale_factor = min(scale_factors.values())\n", - "scale_factor\n" + "Therefore we export the metadata first and append detector data later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ess.nmx.nexus import export_metadata_as_nxlauetof\n", + "\n", + "output_file = \"test.h5\"\n", + "experiment_metadata = wf.compute(NMXExperimentMetadata)\n", + "detector_metas = []\n", + "for detector_i in range(3):\n", + " temp_wf = wf.copy()\n", + " temp_wf[DetectorIndex] = detector_i\n", + " detector_metas.append(temp_wf.compute(NMXDetectorMetadata))\n", + "\n", + "export_metadata_as_nxlauetof(\n", + " *detector_metas,\n", + " experiment_metadata=experiment_metadata,\n", + " output_file=output_file,\n", + " # Arbitrary metadata falls into ``entry`` group as a variable.\n", + " mcstas_weight2count_scale_factor=scale_factor,\n", + ")" ] }, { @@ -136,9 +226,11 @@ "source": [ "## Compute Final Output\n", "\n", - "Now with the `scale_factor: McStasWeight2CountScaleFactor`, we can compute the final output chunk by chunk.\n", + "Now with all the metadata, we can compute the final output chunk by chunk.\n", + "\n", + "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added.\n", "\n", - "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added." + "We will as well export the reduced data detector by detector." ] }, { @@ -150,15 +242,14 @@ "from ess.nmx.mcstas.xml import McStasInstrument\n", "\n", "final_wf = wf.copy()\n", - "# Add the scale factor to the workflow\n", + "# Set the scale factor and time bin edges\n", "final_wf[McStasWeight2CountScaleFactor] = scale_factor\n", + "final_wf[TimeBinSteps] = toa_bin_edges\n", "\n", - "# Compute the static information in advance\n", - "# static_info = wf.compute([CrystalRotation, McStasInstrument])\n", - "static_info = wf.compute([McStasInstrument])\n", - "# final_wf[CrystalRotation] = static_info[CrystalRotation]\n", - "final_wf[CrystalRotation] = sc.vector([0, 0, 0.,], unit='deg')\n", - "final_wf[McStasInstrument] = static_info[McStasInstrument]\n", + "# Set the crystal rotation manually for now ...\n", + "final_wf[CrystalRotation] = sc.vector([0, 0, 0.0], unit='deg')\n", + "# Set static info\n", + "final_wf[McStasInstrument] = wf.compute(McStasInstrument)\n", "final_wf.visualize(NMXReducedDataGroup, compact=True)" ] }, @@ -190,6 +281,7 @@ "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", + "from ess.nmx.nexus import export_reduced_data_as_nxlauetof\n", "\n", "CHUNK_SIZE = 10 # Number of event rows to process at once\n", "# Increase this number to speed up the processing\n", @@ -229,7 +321,8 @@ " cur_detector_progress_bar.value += 1\n", "\n", " result = results[NMXReducedDataGroup]\n", - " display(result)\n" + " display(result)\n", + " export_reduced_data_as_nxlauetof(result, output_file=output_file)\n" ] } ], diff --git a/pyproject.toml b/pyproject.toml index 9559362..a6f1240 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ test = [ "pytest", ] +[project.scripts] +essnmx_reduce_mcstas = "ess.nmx.mcstas.executables:main" + [project.urls] "Bug Tracker" = "https://github.com/scipp/essnmx/issues" "Documentation" = "https://scipp.github.io/essnmx" diff --git a/requirements/base.txt b/requirements/base.txt index 4c060f5..5ca3865 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -29,7 +29,7 @@ dnspython==2.7.0 # via email-validator email-validator==2.2.0 # via scippneutron -essreduce==25.2.4 +essreduce==25.2.5 # via -r base.in fonttools==4.56.0 # via matplotlib @@ -57,7 +57,7 @@ lazy-loader==0.4 # scippneutron locket==1.0.0 # via partd -matplotlib==3.10.0 +matplotlib==3.10.1 # via # mpltoolbox # plopp diff --git a/requirements/docs.txt b/requirements/docs.txt index 1100212..48ccceb 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -54,7 +54,7 @@ ipydatawidgets==4.3.5 # via pythreejs ipykernel==6.29.5 # via -r docs.in -ipython==8.32.0 +ipython==8.33.0 # via # -r docs.in # ipykernel diff --git a/requirements/nightly.txt b/requirements/nightly.txt index 376e00e..d950bf6 100644 --- a/requirements/nightly.txt +++ b/requirements/nightly.txt @@ -32,7 +32,7 @@ dnspython==2.7.0 # via email-validator email-validator==2.2.0 # via scippneutron -essreduce==25.2.4 +essreduce==25.2.5 # via -r nightly.in exceptiongroup==1.2.2 # via pytest @@ -64,7 +64,7 @@ lazy-loader==0.4 # scippneutron locket==1.0.0 # via partd -matplotlib==3.10.0 +matplotlib==3.10.1 # via # mpltoolbox # plopp diff --git a/src/ess/nmx/__init__.py b/src/ess/nmx/__init__.py index 57586b2..3787f2d 100644 --- a/src/ess/nmx/__init__.py +++ b/src/ess/nmx/__init__.py @@ -15,13 +15,10 @@ from .reduction import NMXReducedDataGroup from .types import MaximumCounts, NMXRawEventCountsDataGroup -default_parameters = {MaximumCounts: 10000} - del MaximumCounts __all__ = [ "NMXRawEventCountsDataGroup", "NMXReducedDataGroup", - "default_parameters", "small_mcstas_3_sample", ] diff --git a/src/ess/nmx/mcstas/__init__.py b/src/ess/nmx/mcstas/__init__.py index c40c805..3026dc2 100644 --- a/src/ess/nmx/mcstas/__init__.py +++ b/src/ess/nmx/mcstas/__init__.py @@ -1,11 +1,16 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 Scipp contributors (https://github.com/scipp) +from ..types import MaximumCounts + +default_parameters = {MaximumCounts: 10000} def McStasWorkflow(): import sciline as sl from ess.nmx.reduction import ( + calculate_maximum_toa, + calculate_minimum_toa, format_nmx_reduced_data, proton_charge_from_event_counts, raw_event_probability_to_counts, @@ -18,10 +23,13 @@ def McStasWorkflow(): return sl.Pipeline( ( *loader_providers, + calculate_maximum_toa, + calculate_minimum_toa, read_mcstas_geometry_xml, proton_charge_from_event_counts, reduce_raw_event_probability, raw_event_probability_to_counts, format_nmx_reduced_data, - ) + ), + params=default_parameters, ) diff --git a/src/ess/nmx/mcstas/executables.py b/src/ess/nmx/mcstas/executables.py new file mode 100644 index 0000000..b66a1ce --- /dev/null +++ b/src/ess/nmx/mcstas/executables.py @@ -0,0 +1,268 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Scipp contributors (https://github.com/scipp) +import argparse +import logging +import pathlib +import sys +from collections.abc import Callable +from functools import partial + +import sciline as sl +import scipp as sc + +from ess.reduce.streaming import EternalAccumulator, StreamProcessor + +from ..nexus import export_metadata_as_nxlauetof, export_reduced_data_as_nxlauetof +from ..streaming import MaxAccumulator, MinAccumulator, calculate_number_of_chunks +from ..types import ( + CrystalRotation, + DetectorIndex, + DetectorName, + FilePath, + MaximumCounts, + MaximumProbability, + MaximumTimeOfArrival, + McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, + NMXDetectorMetadata, + NMXExperimentMetadata, + NMXRawDataMetadata, + NMXReducedCounts, + NMXReducedDataGroup, + PixelIds, + RawEventProbability, + TimeBinSteps, +) +from . import McStasWorkflow +from .load import ( + mcstas_weight_to_probability_scalefactor, + raw_event_data_chunk_generator, +) +from .xml import McStasInstrument + + +def _build_metadata_streaming_processor_helper() -> ( + Callable[[sl.Pipeline], StreamProcessor] +): + return partial( + StreamProcessor, + dynamic_keys=(RawEventProbability,), + target_keys=(NMXRawDataMetadata,), + accumulators={ + MaximumProbability: MaxAccumulator, + MaximumTimeOfArrival: MaxAccumulator, + MinimumTimeOfArrival: MinAccumulator, + }, + ) + + +def _build_final_streaming_processor_helper() -> ( + Callable[[sl.Pipeline], StreamProcessor] +): + return partial( + StreamProcessor, + dynamic_keys=(RawEventProbability,), + target_keys=(NMXReducedDataGroup,), + accumulators={NMXReducedCounts: EternalAccumulator}, + ) + + +def calculate_raw_data_metadata( + *detector_ids: DetectorIndex | DetectorName, + wf: sl.Pipeline, + chunk_size: int = 10_000_000, + logger: logging.Logger | None = None, +) -> NMXRawDataMetadata: + # Stream processor building helper + scalefactor_stream_processor = _build_metadata_streaming_processor_helper() + metadata_wf = wf.copy() + # Loop over the detectors + file_path = metadata_wf.compute(FilePath) + raw_data_metadatas = {} + + for detector_i in detector_ids: + temp_wf = metadata_wf.copy() + if isinstance(detector_i, str): + temp_wf[DetectorName] = detector_i + else: + temp_wf[DetectorIndex] = detector_i + + detector_name = temp_wf.compute(DetectorName) + max_chunk_id = calculate_number_of_chunks( + temp_wf.compute(FilePath), + detector_name=detector_name, + chunk_size=chunk_size, + ) + # Build the stream processor + processor = scalefactor_stream_processor(temp_wf) + for i_da, da in enumerate( + raw_event_data_chunk_generator( + file_path=file_path, detector_name=detector_name, chunk_size=chunk_size + ) + ): + if any(da.sizes.values()) == 0: + continue + else: + results = processor.add_chunk({RawEventProbability: da}) + if logger is not None: + logger.info( + "[{%s}/{%s}] Processed chunk for {%s}", + i_da + 1, + max_chunk_id, + detector_name, + ) + + raw_data_metadatas[detector_i] = results[NMXRawDataMetadata] + + # We take the min/maximum values of the scale factor + # We are doing it manually because it is not possible to update parameters + # in the workflow that stream processor uses. + min_toa = min(dg.min_toa for dg in raw_data_metadatas.values()) + max_toa = max(dg.max_toa for dg in raw_data_metadatas.values()) + max_probability = max(dg.max_probability for dg in raw_data_metadatas.values()) + + return NMXRawDataMetadata( + min_toa=min_toa, max_toa=max_toa, max_probability=max_probability + ) + + +def reduction( + *, + input_file: pathlib.Path, + output_file: pathlib.Path, + chunk_size: int = 10_000_000, + detector_ids: list[int], + wf: sl.Pipeline | None = None, + logger: logging.Logger | None = None, +) -> None: + wf = wf.copy() if wf is not None else McStasWorkflow() + wf[FilePath] = input_file + # Set static info + wf[McStasInstrument] = wf.compute(McStasInstrument) + + # Calculate parameters for data reduction + data_metadata = calculate_raw_data_metadata( + *detector_ids, wf=wf, logger=logger, chunk_size=chunk_size + ) + if logger is not None: + logger.info("Metadata retrieved: %s", data_metadata) + + toa_bin_edges = sc.linspace( + dim='t', start=data_metadata.min_toa, stop=data_metadata.max_toa, num=51 + ) + scale_factor = mcstas_weight_to_probability_scalefactor( + max_counts=wf.compute(MaximumCounts), + max_probability=data_metadata.max_probability, + ) + # Compute metadata and make the skeleton output file + experiment_metadata = wf.compute(NMXExperimentMetadata) + detector_metas = [] + for detector_i in range(3): + temp_wf = wf.copy() + temp_wf[DetectorIndex] = detector_i + detector_metas.append(temp_wf.compute(NMXDetectorMetadata)) + + if logger is not None: + logger.info("Exporting metadata into the output file %s", output_file) + + export_metadata_as_nxlauetof( + *detector_metas, + experiment_metadata=experiment_metadata, + output_file=output_file, + # Arbitrary metadata falls into ``entry`` group as a variable. + mcstas_weight2count_scale_factor=scale_factor, + ) + # Compute histogram + final_wf = wf.copy() + # Set the scale factor and time bin edges + final_wf[McStasWeight2CountScaleFactor] = scale_factor + final_wf[TimeBinSteps] = toa_bin_edges + + file_path = final_wf.compute(FilePath) + final_stream_processor = _build_final_streaming_processor_helper() + # Loop over the detectors + for detector_i in detector_ids: + temp_wf = final_wf.copy() + if isinstance(detector_i, str): + temp_wf[DetectorName] = detector_i + else: + temp_wf[DetectorIndex] = detector_i + # Set static information as parameters + detector_name = temp_wf.compute(DetectorName) + temp_wf[PixelIds] = temp_wf.compute(PixelIds) + max_chunk_id = calculate_number_of_chunks( + file_path, detector_name=detector_name, chunk_size=chunk_size + ) + + # Build the stream processor + processor = final_stream_processor(temp_wf) + for i_da, da in enumerate( + raw_event_data_chunk_generator( + file_path=file_path, detector_name=detector_name, chunk_size=chunk_size + ) + ): + if any(da.sizes.values()) == 0: + continue + else: + results = processor.add_chunk({RawEventProbability: da}) + if logger is not None: + logger.info( + "[{%s}/{%s}] Processed chunk for {%s}", + i_da + 1, + max_chunk_id, + detector_name, + ) + + result = results[NMXReducedDataGroup] + if logger is not None: + logger.info("Appending reduced data into the output file %s", output_file) + export_reduced_data_as_nxlauetof(result, output_file=output_file) + + +def main() -> None: + parser = argparse.ArgumentParser(description="McStas Data Reduction.") + parser.add_argument("--input_file", type=str, help="Path to the input file") + parser.add_argument( + "--output_file", + type=str, + default="scipp_output.h5", + help="Path to the output file", + ) + parser.add_argument( + "--verbose", action="store_true", help="Increase output verbosity" + ) + parser.add_argument( + "--chunk_size", + type=int, + default=10_000_000, + help="Chunk size for processing", + ) + parser.add_argument( + "--detector_ids", + type=int, + nargs="+", + default=[0, 1, 2], + help="Detector indices to process", + ) + + args = parser.parse_args() + + input_file = pathlib.Path(args.input_file).resolve() + output_file = pathlib.Path(args.output_file).resolve() + + logger = logging.getLogger(__name__) + if args.verbose: + logger.setLevel(logging.INFO) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + wf = McStasWorkflow() + # Set the crystal rotation manually for now ... + wf[CrystalRotation] = sc.vector([0, 0, 0.0], unit='deg') + reduction( + input_file=input_file, + output_file=output_file, + chunk_size=args.chunk_size, + detector_ids=args.detector_ids, + logger=logger, + wf=wf, + ) diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 66b7ef7..b989d09 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -14,7 +14,12 @@ FilePath, MaximumCounts, MaximumProbability, + MaximumTimeOfArrival, McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, + NMXDetectorMetadata, + NMXExperimentMetadata, + NMXRawDataMetadata, NMXRawEventCountsDataGroup, PixelIds, RawEventProbability, @@ -245,22 +250,34 @@ def bank_names_to_detector_names(description: str) -> dict[str, list[str]]: return bank_names_to_detector_names +def load_experiment_metadata( + instrument: McStasInstrument, crystal_rotation: CrystalRotation +) -> NMXExperimentMetadata: + """Load the experiment metadata from the McStas file.""" + return NMXExperimentMetadata( + sc.DataGroup( + crystal_rotation=crystal_rotation, **instrument.experiment_metadata() + ) + ) + + +def load_detector_metadata( + instrument: McStasInstrument, detector_name: DetectorName +) -> NMXDetectorMetadata: + """Load the detector metadata from the McStas file.""" + return NMXDetectorMetadata( + sc.DataGroup(**instrument.detector_metadata(detector_name)) + ) + + def load_mcstas( *, da: RawEventProbability, - crystal_rotation: CrystalRotation, - detector_name: DetectorName, - instrument: McStasInstrument, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXRawEventCountsDataGroup: - coords = instrument.to_coords(detector_name) return NMXRawEventCountsDataGroup( - sc.DataGroup( - weights=da, - crystal_rotation=crystal_rotation, - name=sc.scalar(detector_name), - pixel_id=instrument.pixel_ids(detector_name), - **coords, - ) + sc.DataGroup(weights=da, **experiment_metadata, **detector_metadata) ) @@ -271,7 +288,19 @@ def retrieve_pixel_ids( return PixelIds(instrument.pixel_ids(detector_name)) +def retrieve_raw_data_metadata( + min_toa: MinimumTimeOfArrival, + max_toa: MaximumTimeOfArrival, + max_probability: MaximumProbability, +) -> NMXRawDataMetadata: + """Retrieve the metadata of the raw data.""" + return NMXRawDataMetadata( + min_toa=min_toa, max_toa=max_toa, max_probability=max_probability + ) + + providers = ( + retrieve_raw_data_metadata, read_mcstas_geometry_xml, detector_name_from_index, load_event_data_bank_name, @@ -281,4 +310,6 @@ def retrieve_pixel_ids( retrieve_pixel_ids, load_crystal_rotation, load_mcstas, + load_experiment_metadata, + load_detector_metadata, ) diff --git a/src/ess/nmx/mcstas/xml.py b/src/ess/nmx/mcstas/xml.py index 3014220..5478f6a 100644 --- a/src/ess/nmx/mcstas/xml.py +++ b/src/ess/nmx/mcstas/xml.py @@ -212,6 +212,11 @@ def num_fast_pixels_per_row(self) -> int: """Number of pixels in each row of the detector along the fast axis.""" return self.num_x if self.fast_axis_name == 'x' else self.num_y + @property + def detector_shape(self) -> tuple: + """Shape of the detector panel. (num_x, num_y)""" + return (self.num_x, self.num_y) + def _collect_detector_descriptions(tree: _XML) -> tuple[DetectorDesc, ...]: """Retrieve detector geometry descriptions from mcstas file.""" @@ -310,12 +315,18 @@ def from_xml( ) +def _construct_pixel_id(detector_desc: DetectorDesc) -> sc.Variable: + """Pixel IDs for single detector.""" + start, stop = ( + detector_desc.id_start, + detector_desc.id_start + detector_desc.total_pixels, + ) + return sc.arange('id', start, stop, unit=None) + + def _construct_pixel_ids(detector_descs: tuple[DetectorDesc, ...]) -> sc.Variable: """Pixel IDs for all detectors.""" - intervals = [ - (desc.id_start, desc.id_start + desc.total_pixels) for desc in detector_descs - ] - ids = [sc.arange('id', start, stop, unit=None) for start, stop in intervals] + ids = [_construct_pixel_id(det) for det in detector_descs] return sc.concat(ids, 'id') @@ -345,17 +356,6 @@ def _pixel_positions( ) + position_offset -def _detector_pixel_positions( - detector_descs: tuple[DetectorDesc, ...], sample: SampleDesc -) -> sc.Variable: - """Position of pixels of all detectors.""" - positions = [ - _pixel_positions(detector, sample.position_from_sample(detector.position)) - for detector in detector_descs - ] - return sc.concat(positions, 'panel') - - @dataclass class McStasInstrument: simulation_settings: SimulationSettings @@ -380,30 +380,72 @@ def from_xml(cls, tree: _XML) -> 'McStasInstrument': ) def pixel_ids(self, *det_names: str) -> sc.Variable: - detectors = tuple(det for det in self.detectors if det.name in det_names) - return _construct_pixel_ids(detectors) + """Pixel IDs for the detectors. - def to_coords(self, *det_names: str) -> dict[str, sc.Variable]: - """Extract coordinates from the McStas instrument description. + If multiple detectors are requested, all pixel IDs will be concatenated along + the 'id' dimension. Parameters ---------- det_names: - Names of the detectors to extract coordinates for. + Names of the detectors to extract pixel IDs for. """ detectors = tuple(det for det in self.detectors if det.name in det_names) - slow_axes = [det.slow_axis for det in detectors] - fast_axes = [det.fast_axis for det in detectors] - origins = [self.sample.position_from_sample(det.position) for det in detectors] + return _construct_pixel_ids(detectors) + + def experiment_metadata(self) -> dict[str, sc.Variable]: + """Extract experiment metadata from the McStas instrument description.""" return { - 'fast_axis': sc.concat(fast_axes, 'panel'), - 'slow_axis': sc.concat(slow_axes, 'panel'), - 'origin_position': sc.concat(origins, 'panel'), 'sample_position': self.sample.position_from_sample(self.sample.position), 'source_position': self.sample.position_from_sample(self.source.position), 'sample_name': sc.scalar(self.sample.name), - 'position': _detector_pixel_positions(detectors, self.sample), + } + + def _detector_metadata(self, det_name: str) -> dict[str, sc.Variable]: + try: + detector = next(det for det in self.detectors if det.name == det_name) + except StopIteration as e: + raise KeyError(f"Detector {det_name} not found.") from e + return { + 'fast_axis': detector.fast_axis, + 'slow_axis': detector.slow_axis, + 'origin_position': self.sample.position_from_sample(detector.position), + 'position': _pixel_positions( + detector, self.sample.position_from_sample(detector.position) + ), + 'detector_shape': sc.scalar(detector.detector_shape), + 'x_pixel_size': detector.step_x, + 'y_pixel_size': detector.step_y, + 'detector_name': sc.scalar(detector.name), + } + + def detector_metadata(self, *det_names: str) -> dict[str, sc.Variable]: + """Extract detector metadata from the McStas instrument description. + + If multiple detector is requested, all metadata will be concatenated along the + 'panel' dimension. + + Parameters + ---------- + det_names: + Names of the detectors to extract metadata for. + + """ + if len(det_names) == 1: + return self._detector_metadata(det_names[0]) + detector_metadatas = { + det_name: self._detector_metadata(det_name) for det_name in det_names + } + # Concat all metadata into panel dimension + metadata_keys: set[str] = set().union( + set(detector_metadatas[det_name].keys()) for det_name in det_names + ) + return { + key: sc.concat( + [metadata[key] for metadata in detector_metadatas.values()], 'panel' + ) + for key in metadata_keys } diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index bf6b509..c2cd6df 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -2,10 +2,16 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) import io import pathlib +import warnings from functools import partial +from typing import Any import h5py +import numpy as np import scipp as sc +import scippnexus as snx + +from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup def _create_dataset_from_var( @@ -16,6 +22,7 @@ def _create_dataset_from_var( long_name: str | None = None, compression: str | None = None, compression_opts: int | None = None, + dtype: Any = None, ) -> h5py.Dataset: compression_options = {} if compression is not None: @@ -25,7 +32,7 @@ def _create_dataset_from_var( dataset = root_entry.create_dataset( name, - data=var.values, + data=var.values if dtype is None else var.values.astype(dtype, copy=False), **compression_options, ) dataset.attrs["units"] = str(var.unit) @@ -129,8 +136,6 @@ def export_as_nexus( Currently exporting step is not expected to be part of sciline pipelines. """ - import warnings - warnings.warn( DeprecationWarning( "Exporting to custom NeXus format will be deprecated in the near future." @@ -145,3 +150,272 @@ def export_as_nexus( _create_instrument_group(data, nx_entry) _create_detector_group(data, nx_entry) _create_source_group(data, nx_entry) + + +def _create_lauetof_data_entry(file_obj: h5py.File) -> h5py.Group: + nx_entry = file_obj.create_group("entry") + nx_entry.attrs["NX_class"] = "NXentry" + return nx_entry + + +def _add_lauetof_definition(nx_entry: h5py.Group) -> None: + nx_entry["definition"] = "NXlauetof" + + +def _add_lauetof_instrument(nx_entry: h5py.Group) -> h5py.Group: + nx_instrument = nx_entry.create_group("instrument") + nx_instrument.attrs["NX_class"] = "NXinstrument" + nx_instrument["name"] = "NMX" + return nx_instrument + + +def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> None: + nx_detector = nx_instrument.create_group(dg["detector_name"].value) # Detector name + nx_detector.attrs["NX_class"] = "NXdetector" + _create_dataset_from_var( + name="polar_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit='deg'), # TODO: Add real data + ) + _create_dataset_from_var( + name="azimuthal_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit='deg'), # TODO: Add real data + ) + _create_dataset_from_var( + name="x_pixel_size", root_entry=nx_detector, var=dg["x_pixel_size"] + ) + _create_dataset_from_var( + name="y_pixel_size", root_entry=nx_detector, var=dg["y_pixel_size"] + ) + _create_dataset_from_var( + name="distance", + root_entry=nx_detector, + var=sc.scalar(0, unit='m'), # TODO: Add real data + ) + # Legacy geometry information until we have a better way to store it + _create_dataset_from_var( + name="origin", root_entry=nx_detector, var=dg['origin_position'] + ) + # Fast axis, along where the pixel ID increases by 1 + _create_dataset_from_var( + root_entry=nx_detector, var=dg['fast_axis'], name="fast_axis" + ) + # Slow axis, along where the pixel ID increases + # by the number of pixels in the fast axis + _create_dataset_from_var( + root_entry=nx_detector, var=dg['slow_axis'], name="slow_axis" + ) + + +def _add_lauetof_sample_group(dg: NMXExperimentMetadata, nx_entry: h5py.Group) -> None: + nx_sample = nx_entry.create_group("sample") + nx_sample.attrs["NX_class"] = "NXsample" + nx_sample["name"] = ( + dg['sample_name'].value + if isinstance(dg['sample_name'], sc.Variable) + else dg['sample_name'] + ) + _create_dataset_from_var( + name='orientation_matrix', + root_entry=nx_sample, + var=sc.array( + dims=['i', 'j'], + values=[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + unit="dimensionless", + ), # TODO: Add real data, the sample orientation matrix + ) + _create_dataset_from_var( + name='unit_cell', + root_entry=nx_sample, + var=sc.array( + dims=['i'], + values=[1.0, 1.0, 1.0, 90.0, 90.0, 90.0], + unit="dimensionless", # TODO: Add real data, + # a, b, c, alpha, beta, gamma + ), + ) + + +def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None: + nx_monitor = nx_entry.create_group("control") + nx_monitor.attrs["NX_class"] = "NXmonitor" + nx_monitor["mode"] = "monitor" + nx_monitor["preset"] = 0.0 # Check if this is the correct value + data_dset = _create_dataset_from_var( + name='data', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="counts" + ), # TODO: Add real data, bin values + ) + data_dset.attrs["signal"] = 1 + data_dset.attrs["primary"] = 1 + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="s" + ), # TODO: Add real data, bin edges + ) + + +def _add_arbitrary_metadata( + nx_entry: h5py.Group, **arbitrary_metadata: sc.Variable +) -> None: + if not arbitrary_metadata: + return + + metadata_group = nx_entry.create_group("metadata") + for key, value in arbitrary_metadata.items(): + if not isinstance(value, sc.Variable): + import warnings + + msg = f"Skipping metadata key '{key}' as it is not a scipp.Variable." + warnings.warn(UserWarning(msg), stacklevel=2) + continue + else: + _create_dataset_from_var( + name=key, + root_entry=metadata_group, + var=value, + ) + + +def export_metadata_as_nxlauetof( + *detector_metadatas: NMXDetectorMetadata, + experiment_metadata: NMXExperimentMetadata, + output_file: str | pathlib.Path | io.BytesIO, + **arbitrary_metadata: sc.Variable, +) -> None: + """Export the metadata to a NeXus file with the LAUE_TOF application definition. + + ``Metadata`` in this context refers to the information + that is not part of the reduced detector counts itself, + but is necessary for the interpretation of the reduced data. + Since NMX can have arbitrary number of detectors, + this function can take multiple detector metadata objects. + + Parameters + ---------- + detector_metadatas: + Detector metadata objects. + experiment_metadata: + Experiment metadata object. + output_file: + Output file path. + arbitrary_metadata: + Arbitrary metadata that does not fit into the existing metadata objects. + + """ + with h5py.File(output_file, "w") as f: + f.attrs["NX_class"] = "NXlauetof" + nx_entry = _create_lauetof_data_entry(f) + _add_lauetof_definition(nx_entry) + nx_instrument = _add_lauetof_instrument(nx_entry) + _add_lauetof_sample_group(experiment_metadata, nx_entry) + # Placeholder for ``monitor`` group + _add_lauetof_monitor_group(experiment_metadata, nx_entry) + # Skipping ``name`` field + # Add detector group metadata + for detector_metadata in detector_metadatas: + _add_lauetof_detector_group(detector_metadata, nx_instrument) + # Add arbitrary metadata + _add_arbitrary_metadata(nx_entry, **arbitrary_metadata) + + +def _validate_existing_metadata( + dg: NMXReducedDataGroup, + detector_group: snx.Group, + sample_group: snx.Group, + safety_checks: bool = True, +) -> None: + flag = True + # check pixel size + flag = flag and sc.identical(dg["x_pixel_size"], detector_group["x_pixel_size"]) + flag = flag and sc.identical(dg["y_pixel_size"], detector_group["y_pixel_size"]) + # check sample name + flag = flag and dg["sample_name"].value == sample_group["name"] + + if not flag and safety_checks: + raise ValueError( + f"Metadata for detector '{dg['detector_name'].value}' in the file " + "does not match the provided data." + ) + elif not flag and not safety_checks: + warnings.warn( + UserWarning( + "Metadata for detector in the file does not match the provided data." + "This may lead to unexpected results." + "However, the operation will proceed as requested " + "since safety checks are disabled." + ), + stacklevel=2, + ) + + +def export_reduced_data_as_nxlauetof( + dg: NMXReducedDataGroup, + output_file: str | pathlib.Path | io.BytesIO, + append_mode: bool = True, + safety_checks: bool = True, +) -> None: + """Export the reduced data to a NeXus file with the LAUE_TOF application definition. + + Even though this function only exports + reduced data(detector counts and its coordinates), + the input should contain all the necessary metadata + for minimum sanity check. + + Parameters + ---------- + dg: + Reduced data and metadata. + output_file: + Output file path. + append_mode: + If ``True``, the file is opened in append mode. + If ``False``, the file is opened in None-append mode. + > None-append mode is not supported for now. + > Only append mode is supported for now. + + """ + if not append_mode: + raise NotImplementedError("Only append mode is supported for now.") + detector_group_path = f"entry/instrument/{dg['detector_name'].value}" + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + # Userwarning is expected here as histogram data is not yet saved. + with snx.File(output_file, "r") as f: + _validate_existing_metadata( + dg=dg, + detector_group=f[detector_group_path][()], + sample_group=f["entry/sample"][()], + safety_checks=safety_checks, + ) + + with h5py.File(output_file, "r+") as f: + nx_detector: h5py.Group = f[detector_group_path] + + if not append_mode: + raise NotImplementedError("Only append mode is supported for now.") + + with h5py.File(output_file, "r+") as f: + nx_detector: h5py.Group = f[f"entry/instrument/{dg['detector_name'].value}"] + # Data - shape: [n_x_pixels, n_y_pixels, n_tof_bins] + # The actual application definition defines it as integer, + # but we keep the original data type for now + num_x, num_y = dg["detector_shape"].value # Probably better way to do this + data_dset = _create_dataset_from_var( + name="data", + root_entry=nx_detector, + var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), + dtype=np.uint, + ) + data_dset.attrs["signal"] = 1 + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_detector, + var=sc.midpoints(dg['counts'].coords['t'], dim='t'), + ) diff --git a/src/ess/nmx/reduction.py b/src/ess/nmx/reduction.py index 5d54323..19e7053 100644 --- a/src/ess/nmx/reduction.py +++ b/src/ess/nmx/reduction.py @@ -2,11 +2,12 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) import scipp as sc -from .mcstas.xml import McStasInstrument from .types import ( - CrystalRotation, - DetectorName, + MaximumTimeOfArrival, McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, + NMXDetectorMetadata, + NMXExperimentMetadata, NMXReducedCounts, NMXReducedDataGroup, NMXReducedProbability, @@ -17,6 +18,16 @@ ) +def calculate_minimum_toa(da: RawEventProbability) -> MinimumTimeOfArrival: + """Calculate the minimum time of arrival from the data.""" + return MinimumTimeOfArrival(da.coords['t'].min()) + + +def calculate_maximum_toa(da: RawEventProbability) -> MaximumTimeOfArrival: + """Calculate the maximum time of arrival from the data.""" + return MaximumTimeOfArrival(da.coords['t'].max()) + + def proton_charge_from_event_counts(da: NMXReducedCounts) -> ProtonCharge: """Make up the proton charge from the event counts. @@ -53,20 +64,18 @@ def raw_event_probability_to_counts( def format_nmx_reduced_data( da: NMXReducedCounts, - detector_name: DetectorName, - instrument: McStasInstrument, proton_charge: ProtonCharge, - crystal_rotation: CrystalRotation, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXReducedDataGroup: """Bin time of arrival data into ``time_bin_step`` bins.""" - new_coords = instrument.to_coords(detector_name) return NMXReducedDataGroup( sc.DataGroup( counts=da, proton_charge=proton_charge, - crystal_rotation=crystal_rotation, - **new_coords, + **experiment_metadata, + **detector_metadata, ) ) diff --git a/src/ess/nmx/streaming.py b/src/ess/nmx/streaming.py index ea08b67..9460b64 100644 --- a/src/ess/nmx/streaming.py +++ b/src/ess/nmx/streaming.py @@ -18,10 +18,6 @@ def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) self._cur_min: sc.Variable | None = None - @property - def value(self) -> sc.Variable | None: - return self._cur_min - def _do_push(self, value: sc.Variable) -> None: new_min = value.min() if self._cur_min is None: @@ -29,6 +25,11 @@ def _do_push(self, value: sc.Variable) -> None: else: self._cur_min = min(self._cur_min, new_min) + def _get_value(self) -> Any: + return self._cur_min + + def clear(self) -> None: ... + class MaxAccumulator(Accumulator): """Accumulator that keeps track of the maximum value seen so far.""" @@ -37,10 +38,6 @@ def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) self._cur_max: sc.Variable | None = None - @property - def value(self) -> sc.Variable | None: - return self._cur_max - def _do_push(self, value: sc.Variable) -> None: new_max = value.max() if self._cur_max is None: @@ -48,6 +45,11 @@ def _do_push(self, value: sc.Variable) -> None: else: self._cur_max = max(self._cur_max, new_max) + def _get_value(self) -> sc.Variable | None: + return self._cur_max + + def clear(self) -> None: ... + def calculate_number_of_chunks( file_path: FilePath, diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index a79921f..0d62902 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any, NewType import scipp as sc @@ -24,13 +25,18 @@ McStasWeight2CountScaleFactor = NewType("McStasWeight2CountScaleFactor", sc.Variable) """Scale factor to convert McStas weights to counts""" +NMXExperimentMetadata = NewType("NMXExperimentMetadata", sc.DataGroup) +"""Metadata of the experiment""" + +NMXDetectorMetadata = NewType("NMXDetectorMetadata", sc.DataGroup) +"""Metadata of the detector""" RawEventProbability = NewType("RawEventProbability", sc.DataArray) """DataArray containing the event probabilities read from the McStas file, has coordinates 'id' and 't' """ NMXRawEventCountsDataGroup = NewType("NMXRawEventCountsDataGroup", sc.DataGroup) -"""DataGroup containing the RawEventData and other metadata""" +"""DataGroup containing the RawEventData, experiment metadata and detector metadata""" ProtonCharge = NewType("ProtonCharge", sc.Variable) """The proton charge signal""" @@ -54,4 +60,19 @@ """Histogram of time-of-arrival and pixel-id.""" NMXReducedDataGroup = NewType("NMXReducedDataGroup", sc.DataGroup) -"""Histogram of time-of-arrival and pixel-id, with additional metadata.""" +"""Datagroup containing Histogram(id, t), experiment metadata and detector metadata""" + +MinimumTimeOfArrival = NewType("MinimumTimeOfArrival", sc.Variable) +"""Minimum time of arrival of the raw data""" + +MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) +"""Maximum time of arrival of the raw data""" + + +@dataclass +class NMXRawDataMetadata: + """Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" + + max_probability: MaximumProbability + min_toa: MinimumTimeOfArrival + max_toa: MaximumTimeOfArrival diff --git a/tests/loader_test.py b/tests/loader_test.py index ccd306e..bebddb2 100644 --- a/tests/loader_test.py +++ b/tests/loader_test.py @@ -10,8 +10,8 @@ import scippnexus as snx from scipp.testing import assert_allclose, assert_identical -from ess.nmx import default_parameters from ess.nmx.data import small_mcstas_2_sample, small_mcstas_3_sample +from ess.nmx.mcstas import default_parameters from ess.nmx.mcstas.load import bank_names_to_detector_names, load_crystal_rotation from ess.nmx.mcstas.load import providers as loader_providers from ess.nmx.types import ( @@ -47,10 +47,8 @@ def check_nmxdata_properties( dg: NMXRawEventCountsDataGroup, fast_axis, slow_axis ) -> None: assert isinstance(dg, sc.DataGroup) - assert_allclose( - sc.squeeze(dg['fast_axis'], 'panel'), fast_axis, atol=sc.scalar(0.005) - ) - assert_identical(sc.squeeze(dg['slow_axis'], 'panel'), slow_axis) + assert_allclose(dg['fast_axis'], fast_axis, atol=sc.scalar(0.005)) + assert_identical(dg['slow_axis'], slow_axis) @pytest.mark.parametrize( diff --git a/tests/workflow_test.py b/tests/workflow_test.py index f0c5c8a..f785fa5 100644 --- a/tests/workflow_test.py +++ b/tests/workflow_test.py @@ -5,8 +5,8 @@ import sciline as sl import scipp as sc -from ess.nmx import default_parameters from ess.nmx.data import small_mcstas_2_sample, small_mcstas_3_sample +from ess.nmx.mcstas import default_parameters from ess.nmx.mcstas.load import providers as load_providers from ess.nmx.reduction import ( NMXReducedDataGroup,