From c6952f5c7a11bdb92fce1e8aa6d2f64729bdd4c7 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 09:45:30 +0100 Subject: [PATCH 01/28] Separate metatadata from event data for easy export. --- src/ess/nmx/mcstas/load.py | 38 ++++++++++----- src/ess/nmx/mcstas/xml.py | 96 +++++++++++++++++++++++++++----------- src/ess/nmx/reduction.py | 15 +++--- src/ess/nmx/types.py | 9 +++- tests/loader_test.py | 6 +-- 5 files changed, 111 insertions(+), 53 deletions(-) diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 66b7ef71..100404a4 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -15,6 +15,8 @@ MaximumCounts, MaximumProbability, McStasWeight2CountScaleFactor, + NMXDetectorMetadata, + NMXExperimentMetadata, NMXRawEventCountsDataGroup, PixelIds, RawEventProbability, @@ -245,22 +247,34 @@ def bank_names_to_detector_names(description: str) -> dict[str, list[str]]: return bank_names_to_detector_names +def load_experiment_metadata( + instrument: McStasInstrument, crystal_rotation: CrystalRotation +) -> NMXExperimentMetadata: + """Load the experiment metadata from the McStas file.""" + return NMXExperimentMetadata( + sc.DataGroup( + crystal_rotation=crystal_rotation, **instrument.experiment_metadata() + ) + ) + + +def load_detector_metadata( + instrument: McStasInstrument, detector_name: DetectorName +) -> NMXDetectorMetadata: + """Load the detector metadata from the McStas file.""" + return NMXDetectorMetadata( + sc.DataGroup(**instrument.detector_metadata(detector_name)) + ) + + def load_mcstas( *, da: RawEventProbability, - crystal_rotation: CrystalRotation, - detector_name: DetectorName, - instrument: McStasInstrument, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXRawEventCountsDataGroup: - coords = instrument.to_coords(detector_name) return NMXRawEventCountsDataGroup( - sc.DataGroup( - weights=da, - crystal_rotation=crystal_rotation, - name=sc.scalar(detector_name), - pixel_id=instrument.pixel_ids(detector_name), - **coords, - ) + sc.DataGroup(weights=da, **experiment_metadata, **detector_metadata) ) @@ -281,4 +295,6 @@ def retrieve_pixel_ids( retrieve_pixel_ids, load_crystal_rotation, load_mcstas, + load_experiment_metadata, + load_detector_metadata, ) diff --git a/src/ess/nmx/mcstas/xml.py b/src/ess/nmx/mcstas/xml.py index 3014220d..5478f6a0 100644 --- a/src/ess/nmx/mcstas/xml.py +++ b/src/ess/nmx/mcstas/xml.py @@ -212,6 +212,11 @@ def num_fast_pixels_per_row(self) -> int: """Number of pixels in each row of the detector along the fast axis.""" return self.num_x if self.fast_axis_name == 'x' else self.num_y + @property + def detector_shape(self) -> tuple: + """Shape of the detector panel. (num_x, num_y)""" + return (self.num_x, self.num_y) + def _collect_detector_descriptions(tree: _XML) -> tuple[DetectorDesc, ...]: """Retrieve detector geometry descriptions from mcstas file.""" @@ -310,12 +315,18 @@ def from_xml( ) +def _construct_pixel_id(detector_desc: DetectorDesc) -> sc.Variable: + """Pixel IDs for single detector.""" + start, stop = ( + detector_desc.id_start, + detector_desc.id_start + detector_desc.total_pixels, + ) + return sc.arange('id', start, stop, unit=None) + + def _construct_pixel_ids(detector_descs: tuple[DetectorDesc, ...]) -> sc.Variable: """Pixel IDs for all detectors.""" - intervals = [ - (desc.id_start, desc.id_start + desc.total_pixels) for desc in detector_descs - ] - ids = [sc.arange('id', start, stop, unit=None) for start, stop in intervals] + ids = [_construct_pixel_id(det) for det in detector_descs] return sc.concat(ids, 'id') @@ -345,17 +356,6 @@ def _pixel_positions( ) + position_offset -def _detector_pixel_positions( - detector_descs: tuple[DetectorDesc, ...], sample: SampleDesc -) -> sc.Variable: - """Position of pixels of all detectors.""" - positions = [ - _pixel_positions(detector, sample.position_from_sample(detector.position)) - for detector in detector_descs - ] - return sc.concat(positions, 'panel') - - @dataclass class McStasInstrument: simulation_settings: SimulationSettings @@ -380,30 +380,72 @@ def from_xml(cls, tree: _XML) -> 'McStasInstrument': ) def pixel_ids(self, *det_names: str) -> sc.Variable: - detectors = tuple(det for det in self.detectors if det.name in det_names) - return _construct_pixel_ids(detectors) + """Pixel IDs for the detectors. - def to_coords(self, *det_names: str) -> dict[str, sc.Variable]: - """Extract coordinates from the McStas instrument description. + If multiple detectors are requested, all pixel IDs will be concatenated along + the 'id' dimension. Parameters ---------- det_names: - Names of the detectors to extract coordinates for. + Names of the detectors to extract pixel IDs for. """ detectors = tuple(det for det in self.detectors if det.name in det_names) - slow_axes = [det.slow_axis for det in detectors] - fast_axes = [det.fast_axis for det in detectors] - origins = [self.sample.position_from_sample(det.position) for det in detectors] + return _construct_pixel_ids(detectors) + + def experiment_metadata(self) -> dict[str, sc.Variable]: + """Extract experiment metadata from the McStas instrument description.""" return { - 'fast_axis': sc.concat(fast_axes, 'panel'), - 'slow_axis': sc.concat(slow_axes, 'panel'), - 'origin_position': sc.concat(origins, 'panel'), 'sample_position': self.sample.position_from_sample(self.sample.position), 'source_position': self.sample.position_from_sample(self.source.position), 'sample_name': sc.scalar(self.sample.name), - 'position': _detector_pixel_positions(detectors, self.sample), + } + + def _detector_metadata(self, det_name: str) -> dict[str, sc.Variable]: + try: + detector = next(det for det in self.detectors if det.name == det_name) + except StopIteration as e: + raise KeyError(f"Detector {det_name} not found.") from e + return { + 'fast_axis': detector.fast_axis, + 'slow_axis': detector.slow_axis, + 'origin_position': self.sample.position_from_sample(detector.position), + 'position': _pixel_positions( + detector, self.sample.position_from_sample(detector.position) + ), + 'detector_shape': sc.scalar(detector.detector_shape), + 'x_pixel_size': detector.step_x, + 'y_pixel_size': detector.step_y, + 'detector_name': sc.scalar(detector.name), + } + + def detector_metadata(self, *det_names: str) -> dict[str, sc.Variable]: + """Extract detector metadata from the McStas instrument description. + + If multiple detector is requested, all metadata will be concatenated along the + 'panel' dimension. + + Parameters + ---------- + det_names: + Names of the detectors to extract metadata for. + + """ + if len(det_names) == 1: + return self._detector_metadata(det_names[0]) + detector_metadatas = { + det_name: self._detector_metadata(det_name) for det_name in det_names + } + # Concat all metadata into panel dimension + metadata_keys: set[str] = set().union( + set(detector_metadatas[det_name].keys()) for det_name in det_names + ) + return { + key: sc.concat( + [metadata[key] for metadata in detector_metadatas.values()], 'panel' + ) + for key in metadata_keys } diff --git a/src/ess/nmx/reduction.py b/src/ess/nmx/reduction.py index 5d54323d..035dfb18 100644 --- a/src/ess/nmx/reduction.py +++ b/src/ess/nmx/reduction.py @@ -2,11 +2,10 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) import scipp as sc -from .mcstas.xml import McStasInstrument from .types import ( - CrystalRotation, - DetectorName, McStasWeight2CountScaleFactor, + NMXDetectorMetadata, + NMXExperimentMetadata, NMXReducedCounts, NMXReducedDataGroup, NMXReducedProbability, @@ -53,20 +52,18 @@ def raw_event_probability_to_counts( def format_nmx_reduced_data( da: NMXReducedCounts, - detector_name: DetectorName, - instrument: McStasInstrument, proton_charge: ProtonCharge, - crystal_rotation: CrystalRotation, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXReducedDataGroup: """Bin time of arrival data into ``time_bin_step`` bins.""" - new_coords = instrument.to_coords(detector_name) return NMXReducedDataGroup( sc.DataGroup( counts=da, proton_charge=proton_charge, - crystal_rotation=crystal_rotation, - **new_coords, + **experiment_metadata, + **detector_metadata, ) ) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index a79921f6..aae56797 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -24,13 +24,18 @@ McStasWeight2CountScaleFactor = NewType("McStasWeight2CountScaleFactor", sc.Variable) """Scale factor to convert McStas weights to counts""" +NMXExperimentMetadata = NewType("NMXExperimentMetadata", sc.DataGroup) +"""Metadata of the experiment""" + +NMXDetectorMetadata = NewType("NMXDetectorMetadata", sc.DataGroup) +"""Metadata of the detector""" RawEventProbability = NewType("RawEventProbability", sc.DataArray) """DataArray containing the event probabilities read from the McStas file, has coordinates 'id' and 't' """ NMXRawEventCountsDataGroup = NewType("NMXRawEventCountsDataGroup", sc.DataGroup) -"""DataGroup containing the RawEventData and other metadata""" +"""DataGroup containing the RawEventData, experiment metadata and detector metadata""" ProtonCharge = NewType("ProtonCharge", sc.Variable) """The proton charge signal""" @@ -54,4 +59,4 @@ """Histogram of time-of-arrival and pixel-id.""" NMXReducedDataGroup = NewType("NMXReducedDataGroup", sc.DataGroup) -"""Histogram of time-of-arrival and pixel-id, with additional metadata.""" +"""Datagroup containing Histogram(id, t), experiment metadata and detector metadata""" diff --git a/tests/loader_test.py b/tests/loader_test.py index ccd306eb..622652d0 100644 --- a/tests/loader_test.py +++ b/tests/loader_test.py @@ -47,10 +47,8 @@ def check_nmxdata_properties( dg: NMXRawEventCountsDataGroup, fast_axis, slow_axis ) -> None: assert isinstance(dg, sc.DataGroup) - assert_allclose( - sc.squeeze(dg['fast_axis'], 'panel'), fast_axis, atol=sc.scalar(0.005) - ) - assert_identical(sc.squeeze(dg['slow_axis'], 'panel'), slow_axis) + assert_allclose(dg['fast_axis'], fast_axis, atol=sc.scalar(0.005)) + assert_identical(dg['slow_axis'], slow_axis) @pytest.mark.parametrize( From cd89c0977292d795b306aa2db9aee38c087d6df5 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 10:24:11 +0100 Subject: [PATCH 02/28] Add raw data metadata retrieval part. --- docs/user-guide/workflow_chunk.ipynb | 64 +++++++++++++++++----------- src/ess/nmx/mcstas/load.py | 31 ++++++++++++++ src/ess/nmx/types.py | 9 ++++ 3 files changed, 79 insertions(+), 25 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 148fe175..a99e72bc 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -37,17 +37,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Compute Maximum Probabiliity\n", + "## Compute Raw Data Metadata\n", "\n", - "`McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", + "`time-of-flight` coordinate and `McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", "\n", - "Therefore we need to compute `McStasWeight2CoutScaleFactor` before we compute `NMXReducedDataGroup`.\n", + "Therefore we need to compute `TimeBinStep` and `McStasWeight2CoutScaleFactor` before we compute `NMXReducedData`.\n", "\n", "It can be done by `ess.reduce.streaming.StreamProcessor`.\n", "\n", - "In this example, `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", + "In this example, `MinimumTimeOfArrival`, `MaximumTimeOfArrival` and `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", "\n", - "`MaxAccumulator` remembers the previous maximum value and compute new maximum value with the new chunk.\n", + "`(Min/Max)Accumulator` remembers the previous minimum/maximum value and compute new minimum/maximum value with the new chunk.\n", "\n", "``raw_event_data_chunk_generator`` yields a chunk of raw event probability from mcstas h5 file.\n", "\n", @@ -62,16 +62,20 @@ "source": [ "from functools import partial\n", "from ess.reduce.streaming import StreamProcessor\n", - "from ess.nmx.streaming import MaxAccumulator\n", + "from ess.nmx.streaming import MaxAccumulator, MinAccumulator\n", "\n", "# Stream processor building helper\n", "scalefactor_stream_processor = partial(\n", " StreamProcessor,\n", " dynamic_keys=(RawEventProbability,),\n", - " target_keys=(McStasWeight2CountScaleFactor,),\n", - " accumulators={MaximumProbability: MaxAccumulator},\n", + " target_keys=(NMXRawDataMetadata,),\n", + " accumulators={\n", + " MaximumProbability: MaxAccumulator,\n", + " MaximumTimeOfArrival: MaxAccumulator,\n", + " MinimumTimeOfArrival: MinAccumulator,\n", + " },\n", ")\n", - "scalefactor_wf = wf.copy()" + "metadata_wf = wf.copy()" ] }, { @@ -80,7 +84,7 @@ "metadata": {}, "outputs": [], "source": [ - "scalefactor_wf.visualize(McStasWeight2CountScaleFactor, graph_attr={\"rankdir\": \"TD\"}, compact=True)" + "metadata_wf.visualize(NMXRawDataMetadata, graph_attr={\"rankdir\": \"TD\"}, compact=True)" ] }, { @@ -90,7 +94,10 @@ "outputs": [], "source": [ "from ess.nmx.types import DetectorName\n", - "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", + "from ess.nmx.mcstas.load import (\n", + " raw_event_data_chunk_generator,\n", + " mcstas_weight_to_probability_scalefactor,\n", + ")\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", "\n", @@ -99,10 +106,11 @@ "NUM_DETECTORS = 3\n", "\n", "# Loop over the detectors\n", - "file_path = scalefactor_wf.compute(FilePath)\n", - "scale_factors = {}\n", + "file_path = metadata_wf.compute(FilePath)\n", + "raw_data_metadatas = {}\n", + "\n", "for detector_i in range(0, NUM_DETECTORS):\n", - " temp_wf = scalefactor_wf.copy()\n", + " temp_wf = metadata_wf.copy()\n", " temp_wf[DetectorIndex] = detector_i\n", " detector_name = temp_wf.compute(DetectorName)\n", " max_chunk_id = calculate_number_of_chunks(\n", @@ -123,11 +131,18 @@ " else:\n", " results = processor.add_chunk({RawEventProbability: da})\n", " cur_detector_progress_bar.value += 1\n", - " scale_factors[detector_i] = results[McStasWeight2CountScaleFactor]\n", + " display(results[NMXRawDataMetadata])\n", + " raw_data_metadatas[detector_i] = results[NMXRawDataMetadata]\n", "\n", - "# We take the minimum scale factor for the entire dataset\n", - "scale_factor = min(scale_factors.values())\n", - "scale_factor\n" + "# We take the min/maximum values of the scale factor\n", + "min_toa = min(dg['min_toa'] for dg in raw_data_metadatas.values())\n", + "max_toa = max(dg['max_toa'] for dg in raw_data_metadatas.values())\n", + "max_probability = max(dg['max_probability'] for dg in raw_data_metadatas.values())\n", + "\n", + "toa_bin_edges = sc.linspace(dim='t', start=min_toa, stop=max_toa, num=51)\n", + "scale_factor = mcstas_weight_to_probability_scalefactor(\n", + " max_counts=wf.compute(MaximumCounts), max_probability=max_probability\n", + ")" ] }, { @@ -150,15 +165,14 @@ "from ess.nmx.mcstas.xml import McStasInstrument\n", "\n", "final_wf = wf.copy()\n", - "# Add the scale factor to the workflow\n", + "# Set the scale factor and time bin edges\n", "final_wf[McStasWeight2CountScaleFactor] = scale_factor\n", + "final_wf[TimeBinSteps] = toa_bin_edges\n", "\n", - "# Compute the static information in advance\n", - "# static_info = wf.compute([CrystalRotation, McStasInstrument])\n", - "static_info = wf.compute([McStasInstrument])\n", - "# final_wf[CrystalRotation] = static_info[CrystalRotation]\n", - "final_wf[CrystalRotation] = sc.vector([0, 0, 0.,], unit='deg')\n", - "final_wf[McStasInstrument] = static_info[McStasInstrument]\n", + "# Set the crystal rotation manually for now ...\n", + "final_wf[CrystalRotation] = sc.vector([0, 0, 0.0], unit='deg')\n", + "# Set static info\n", + "final_wf[McStasInstrument] = wf.compute(McStasInstrument)\n", "final_wf.visualize(NMXReducedDataGroup, compact=True)" ] }, diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 100404a4..3efa3d80 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -14,9 +14,12 @@ FilePath, MaximumCounts, MaximumProbability, + MaximumTimeOfArrival, McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, NMXDetectorMetadata, NMXExperimentMetadata, + NMXRawDataMetadata, NMXRawEventCountsDataGroup, PixelIds, RawEventProbability, @@ -285,7 +288,35 @@ def retrieve_pixel_ids( return PixelIds(instrument.pixel_ids(detector_name)) +def calculate_minimum_toa(da: RawEventProbability) -> MinimumTimeOfArrival: + """Calculate the minimum time of arrival from the data.""" + return MinimumTimeOfArrival(da.coords['t'].min()) + + +def calculate_maximum_toa(da: RawEventProbability) -> MaximumTimeOfArrival: + """Calculate the maximum time of arrival from the data.""" + return MaximumTimeOfArrival(da.coords['t'].max()) + + +def retrieve_raw_data_metadata( + min_toa: MinimumTimeOfArrival, + max_toa: MaximumTimeOfArrival, + max_probability: MaximumProbability, +) -> NMXRawDataMetadata: + """Retrieve the metadata of the raw data.""" + return NMXRawDataMetadata( + sc.DataGroup( + min_toa=min_toa, + max_toa=max_toa, + max_probability=max_probability, + ) + ) + + providers = ( + calculate_minimum_toa, + calculate_maximum_toa, + retrieve_raw_data_metadata, read_mcstas_geometry_xml, detector_name_from_index, load_event_data_bank_name, diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index aae56797..100261b2 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -60,3 +60,12 @@ NMXReducedDataGroup = NewType("NMXReducedDataGroup", sc.DataGroup) """Datagroup containing Histogram(id, t), experiment metadata and detector metadata""" + +MinimumTimeOfArrival = NewType("MinimumTimeOfArrival", sc.Variable) +"""Minimum time of arrival of the raw data""" + +MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) +"""Maximum time of arrival of the raw data""" + +NMXRawDataMetadata = NewType("NMXRawDataMetadata", sc.DataGroup) +"""Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" From 03a5f00b9cd7d8073efd00bc9d88f8466175606a Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 11:02:04 +0100 Subject: [PATCH 03/28] Lauetof export interface. --- docs/user-guide/workflow_chunk.ipynb | 42 ++++++- src/ess/nmx/nexus.py | 159 ++++++++++++++++++++++++++- 2 files changed, 197 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index a99e72bc..adc95d2f 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -145,15 +145,49 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compute Metadata\n", + "\n", + "Other metadata does not require any chunk-based computation.\n", + "\n", + "Therefore we export the metadata first and append detector data later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ess.nmx.nexus import export_metadata_as_nxlauetof\n", + "\n", + "output_file = \"test.h5\"\n", + "experiment_metadata = wf.compute(NMXExperimentMetadata)\n", + "detector_metas = []\n", + "for detector_i in range(3):\n", + " temp_wf = wf.copy()\n", + " temp_wf[DetectorIndex] = detector_i\n", + " detector_metas.append(temp_wf.compute(NMXDetectorMetadata))\n", + "\n", + "export_metadata_as_nxlauetof(\n", + " experiment_metadata, *detector_metas, output_file=output_file\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute Final Output\n", "\n", - "Now with the `scale_factor: McStasWeight2CountScaleFactor`, we can compute the final output chunk by chunk.\n", + "Now with all the metadata, we can compute the final output chunk by chunk.\n", + "\n", + "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added.\n", "\n", - "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added." + "We will as well export the reduced data detector by detector." ] }, { @@ -204,6 +238,7 @@ "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", + "from ess.nmx.nexus import export_reduced_data_as_nxlauetof\n", "\n", "CHUNK_SIZE = 10 # Number of event rows to process at once\n", "# Increase this number to speed up the processing\n", @@ -243,7 +278,8 @@ " cur_detector_progress_bar.value += 1\n", "\n", " result = results[NMXReducedDataGroup]\n", - " display(result)\n" + " display(result)\n", + " export_reduced_data_as_nxlauetof(result, output_file=output_file)\n" ] } ], diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index bf6b509d..59265938 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -3,10 +3,14 @@ import io import pathlib from functools import partial +from typing import Any import h5py +import numpy as np import scipp as sc +from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup + def _create_dataset_from_var( *, @@ -16,6 +20,7 @@ def _create_dataset_from_var( long_name: str | None = None, compression: str | None = None, compression_opts: int | None = None, + dtype: Any = None, ) -> h5py.Dataset: compression_options = {} if compression is not None: @@ -25,7 +30,7 @@ def _create_dataset_from_var( dataset = root_entry.create_dataset( name, - data=var.values, + data=var.values if dtype is None else var.values.astype(dtype, copy=False), **compression_options, ) dataset.attrs["units"] = str(var.unit) @@ -145,3 +150,155 @@ def export_as_nexus( _create_instrument_group(data, nx_entry) _create_detector_group(data, nx_entry) _create_source_group(data, nx_entry) + + +def _create_lauetof_data_entry(file_obj: h5py.File) -> h5py.Group: + nx_entry = file_obj.create_group("entry") + nx_entry.attrs["NX_class"] = "NXentry" + return nx_entry + + +def _add_lauetof_definition(nx_entry: h5py.Group) -> None: + nx_entry["definition"] = "NXlauetof" + + +def _add_lauetof_instrument(nx_entry: h5py.Group) -> h5py.Group: + nx_instrument = nx_entry.create_group("instrument") + nx_instrument.attrs["NX_class"] = "NXinstrument" + nx_instrument["name"] = "NMX" + return nx_instrument + + +def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> None: + nx_detector = nx_instrument.create_group(dg["detector_name"].value) # Detector name + nx_detector.attrs["NX_class"] = "NXdetector" + # Polar angle + _create_dataset_from_var( + name="polar_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit='deg'), # TODO: Add real data + ) + # Azimuthal angle + _create_dataset_from_var( + name="azimuthal_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit=''), # TODO: Add real data + ) + # x_pixel_size + _create_dataset_from_var( + name="x_pixel_size", root_entry=nx_detector, var=dg["x_pixel_size"] + ) + # y_pixel_size + _create_dataset_from_var( + name="y_pixel_size", root_entry=nx_detector, var=dg["y_pixel_size"] + ) + # distance + _create_dataset_from_var( + name="distance", + root_entry=nx_detector, + var=sc.scalar(0, unit='m'), # TODO: Add real data + ) + # Legacy geometry information until we have a better way to store it + _create_dataset_from_var( + name="origin", root_entry=nx_detector, var=dg['origin_position'] + ) + # Fast axis, along where the pixel ID increases by 1 + _create_dataset_from_var( + root_entry=nx_detector, var=dg['fast_axis'], name="fast_axis" + ) + # Slow axis, along where the pixel ID increases + # by the number of pixels in the fast axis + _create_dataset_from_var( + root_entry=nx_detector, var=dg['slow_axis'], name="slow_axis" + ) + + +def _add_lauetof_sample_group(dg: NMXExperimentMetadata, nx_entry: h5py.Group) -> None: + nx_sample = nx_entry.create_group("sample") + nx_sample.attrs["NX_class"] = "NXsample" + nx_sample["name"] = ( + dg['sample_name'].value + if isinstance(dg['sample_name'], sc.Variable) + else dg['sample_name'] + ) + _create_dataset_from_var( + name='orientation_matrix', + root_entry=nx_sample, + var=sc.array( + dims=['i', 'j'], + values=[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + unit="dimensionless", + ), # TODO: Add real data, the sample orientation matrix + ) + _create_dataset_from_var( + name='unit_cell', + root_entry=nx_sample, + var=sc.array( + dims=['i'], + values=[1.0, 1.0, 1.0, 90.0, 90.0, 90.0], + unit="dimensionless", # TODO: Add real data, + # a, b, c, alpha, beta, gamma + ), + ) + + +def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None: + nx_monitor = nx_entry.create_group("control") + nx_monitor.attrs["NX_class"] = "NXmonitor" + nx_monitor["mode"] = "monitor" + nx_monitor["preset"] = 0.0 # Check if this is the correct value + _create_dataset_from_var( + name='data', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="counts" + ), # TODO: Add real data, bin values + ) + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="s" + ), # TODO: Add real data, bin edges + ) + + +def export_metadata_as_nxlauetof( + experiment_metadata: NMXExperimentMetadata, + *detector_metadatas: NMXDetectorMetadata, + output_file: str | pathlib.Path | io.BytesIO, +): + with h5py.File(output_file, "w") as f: + f.attrs["NX_class"] = "NXlauetof" + nx_entry = _create_lauetof_data_entry(f) + _add_lauetof_definition(nx_entry) + nx_instrument = _add_lauetof_instrument(nx_entry) + _add_lauetof_sample_group(experiment_metadata, nx_entry) + # Placeholder for ``monitor`` group + _add_lauetof_monitor_group(experiment_metadata, nx_entry) + # Skipping ``name`` field + # Add detector group metadata + for detector_metadata in detector_metadatas: + _add_lauetof_detector_group(detector_metadata, nx_instrument) + + +def export_reduced_data_as_nxlauetof( + dg: NMXReducedDataGroup, + output_file: str | pathlib.Path | io.BytesIO, + append_mode: bool = True, +) -> None: + if not append_mode: + raise NotImplementedError("Only append mode is supported for now.") + + with h5py.File(output_file, "r+") as f: + nx_detector: h5py.Group = f[f"entry/instrument/{dg['detector_name'].value}"] + # Data - shape: [n_x_pixels, n_y_pixels, n_tof_bins] + # The actual application definition defines it as integer, + # but we keep the original data type for now + num_x, num_y = dg["detector_shape"].value # Probably better way to do this + _create_dataset_from_var( + name="data", + root_entry=nx_detector, + var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), + dtype=np.uint, + ) From c4fdb2c75c8c8949d3f6ef01f0658d86f753721a Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 11:22:41 +0100 Subject: [PATCH 04/28] Raw data metadata as dataclass --- docs/user-guide/workflow_chunk.ipynb | 6 +++--- src/ess/nmx/mcstas/load.py | 6 +----- src/ess/nmx/types.py | 11 +++++++++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index adc95d2f..673cd13b 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -135,9 +135,9 @@ " raw_data_metadatas[detector_i] = results[NMXRawDataMetadata]\n", "\n", "# We take the min/maximum values of the scale factor\n", - "min_toa = min(dg['min_toa'] for dg in raw_data_metadatas.values())\n", - "max_toa = max(dg['max_toa'] for dg in raw_data_metadatas.values())\n", - "max_probability = max(dg['max_probability'] for dg in raw_data_metadatas.values())\n", + "min_toa = min(meta.min_toa for meta in raw_data_metadatas.values())\n", + "max_toa = max(meta.max_toa for meta in raw_data_metadatas.values())\n", + "max_probability = max(meta.max_probability for meta in raw_data_metadatas.values())\n", "\n", "toa_bin_edges = sc.linspace(dim='t', start=min_toa, stop=max_toa, num=51)\n", "scale_factor = mcstas_weight_to_probability_scalefactor(\n", diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 3efa3d80..571ebd08 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -305,11 +305,7 @@ def retrieve_raw_data_metadata( ) -> NMXRawDataMetadata: """Retrieve the metadata of the raw data.""" return NMXRawDataMetadata( - sc.DataGroup( - min_toa=min_toa, - max_toa=max_toa, - max_probability=max_probability, - ) + min_toa=min_toa, max_toa=max_toa, max_probability=max_probability ) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index 100261b2..0d629021 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any, NewType import scipp as sc @@ -67,5 +68,11 @@ MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) """Maximum time of arrival of the raw data""" -NMXRawDataMetadata = NewType("NMXRawDataMetadata", sc.DataGroup) -"""Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" + +@dataclass +class NMXRawDataMetadata: + """Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" + + max_probability: MaximumProbability + min_toa: MinimumTimeOfArrival + max_toa: MaximumTimeOfArrival From 005f890f216fc1b149bfe0b4a1b3f787cd0e124d Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 13:25:12 +0100 Subject: [PATCH 05/28] Allow arbitrary metadata and export time of flight from the coordinate. --- docs/user-guide/workflow_chunk.ipynb | 6 +++++- src/ess/nmx/nexus.py | 32 +++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 673cd13b..729a8778 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -173,7 +173,11 @@ " detector_metas.append(temp_wf.compute(NMXDetectorMetadata))\n", "\n", "export_metadata_as_nxlauetof(\n", - " experiment_metadata, *detector_metas, output_file=output_file\n", + " *detector_metas,\n", + " experiment_metadata=experiment_metadata,\n", + " output_file=output_file,\n", + " # Arbitrary metadata falls into ``entry`` group as a variable.\n", + " mcstas_weight2count_scale_factor=scale_factor,\n", ")" ] }, diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 59265938..0badb62e 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -263,10 +263,33 @@ def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None ) +def _add_arbitrary_metadata( + nx_entry: h5py.Group, **arbitrary_metadata: sc.Variable +) -> None: + if not arbitrary_metadata: + return + + metadata_group = nx_entry.create_group("metadata") + for key, value in arbitrary_metadata.items(): + if not isinstance(value, sc.Variable): + import warnings + + msg = f"Skipping metadata key '{key}' as it is not a scipp.Variable." + warnings.warn(UserWarning(msg), stacklevel=2) + continue + else: + _create_dataset_from_var( + name=key, + root_entry=metadata_group, + var=value, + ) + + def export_metadata_as_nxlauetof( - experiment_metadata: NMXExperimentMetadata, *detector_metadatas: NMXDetectorMetadata, + experiment_metadata: NMXExperimentMetadata, output_file: str | pathlib.Path | io.BytesIO, + **arbitrary_metadata: sc.Variable, ): with h5py.File(output_file, "w") as f: f.attrs["NX_class"] = "NXlauetof" @@ -280,6 +303,8 @@ def export_metadata_as_nxlauetof( # Add detector group metadata for detector_metadata in detector_metadatas: _add_lauetof_detector_group(detector_metadata, nx_instrument) + # Add arbitrary metadata + _add_arbitrary_metadata(nx_entry, **arbitrary_metadata) def export_reduced_data_as_nxlauetof( @@ -302,3 +327,8 @@ def export_reduced_data_as_nxlauetof( var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), dtype=np.uint, ) + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_detector, + var=sc.midpoints(dg['counts'].coords['t'], dim='t'), + ) From cfa2a0e18bee873ed02a8af38956aae4bb6000f5 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 09:45:30 +0100 Subject: [PATCH 06/28] Separate metatadata from event data for easy export. --- src/ess/nmx/mcstas/load.py | 38 ++++++++++----- src/ess/nmx/mcstas/xml.py | 96 +++++++++++++++++++++++++++----------- src/ess/nmx/reduction.py | 15 +++--- src/ess/nmx/types.py | 9 +++- tests/loader_test.py | 6 +-- 5 files changed, 111 insertions(+), 53 deletions(-) diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 66b7ef71..100404a4 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -15,6 +15,8 @@ MaximumCounts, MaximumProbability, McStasWeight2CountScaleFactor, + NMXDetectorMetadata, + NMXExperimentMetadata, NMXRawEventCountsDataGroup, PixelIds, RawEventProbability, @@ -245,22 +247,34 @@ def bank_names_to_detector_names(description: str) -> dict[str, list[str]]: return bank_names_to_detector_names +def load_experiment_metadata( + instrument: McStasInstrument, crystal_rotation: CrystalRotation +) -> NMXExperimentMetadata: + """Load the experiment metadata from the McStas file.""" + return NMXExperimentMetadata( + sc.DataGroup( + crystal_rotation=crystal_rotation, **instrument.experiment_metadata() + ) + ) + + +def load_detector_metadata( + instrument: McStasInstrument, detector_name: DetectorName +) -> NMXDetectorMetadata: + """Load the detector metadata from the McStas file.""" + return NMXDetectorMetadata( + sc.DataGroup(**instrument.detector_metadata(detector_name)) + ) + + def load_mcstas( *, da: RawEventProbability, - crystal_rotation: CrystalRotation, - detector_name: DetectorName, - instrument: McStasInstrument, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXRawEventCountsDataGroup: - coords = instrument.to_coords(detector_name) return NMXRawEventCountsDataGroup( - sc.DataGroup( - weights=da, - crystal_rotation=crystal_rotation, - name=sc.scalar(detector_name), - pixel_id=instrument.pixel_ids(detector_name), - **coords, - ) + sc.DataGroup(weights=da, **experiment_metadata, **detector_metadata) ) @@ -281,4 +295,6 @@ def retrieve_pixel_ids( retrieve_pixel_ids, load_crystal_rotation, load_mcstas, + load_experiment_metadata, + load_detector_metadata, ) diff --git a/src/ess/nmx/mcstas/xml.py b/src/ess/nmx/mcstas/xml.py index 3014220d..5478f6a0 100644 --- a/src/ess/nmx/mcstas/xml.py +++ b/src/ess/nmx/mcstas/xml.py @@ -212,6 +212,11 @@ def num_fast_pixels_per_row(self) -> int: """Number of pixels in each row of the detector along the fast axis.""" return self.num_x if self.fast_axis_name == 'x' else self.num_y + @property + def detector_shape(self) -> tuple: + """Shape of the detector panel. (num_x, num_y)""" + return (self.num_x, self.num_y) + def _collect_detector_descriptions(tree: _XML) -> tuple[DetectorDesc, ...]: """Retrieve detector geometry descriptions from mcstas file.""" @@ -310,12 +315,18 @@ def from_xml( ) +def _construct_pixel_id(detector_desc: DetectorDesc) -> sc.Variable: + """Pixel IDs for single detector.""" + start, stop = ( + detector_desc.id_start, + detector_desc.id_start + detector_desc.total_pixels, + ) + return sc.arange('id', start, stop, unit=None) + + def _construct_pixel_ids(detector_descs: tuple[DetectorDesc, ...]) -> sc.Variable: """Pixel IDs for all detectors.""" - intervals = [ - (desc.id_start, desc.id_start + desc.total_pixels) for desc in detector_descs - ] - ids = [sc.arange('id', start, stop, unit=None) for start, stop in intervals] + ids = [_construct_pixel_id(det) for det in detector_descs] return sc.concat(ids, 'id') @@ -345,17 +356,6 @@ def _pixel_positions( ) + position_offset -def _detector_pixel_positions( - detector_descs: tuple[DetectorDesc, ...], sample: SampleDesc -) -> sc.Variable: - """Position of pixels of all detectors.""" - positions = [ - _pixel_positions(detector, sample.position_from_sample(detector.position)) - for detector in detector_descs - ] - return sc.concat(positions, 'panel') - - @dataclass class McStasInstrument: simulation_settings: SimulationSettings @@ -380,30 +380,72 @@ def from_xml(cls, tree: _XML) -> 'McStasInstrument': ) def pixel_ids(self, *det_names: str) -> sc.Variable: - detectors = tuple(det for det in self.detectors if det.name in det_names) - return _construct_pixel_ids(detectors) + """Pixel IDs for the detectors. - def to_coords(self, *det_names: str) -> dict[str, sc.Variable]: - """Extract coordinates from the McStas instrument description. + If multiple detectors are requested, all pixel IDs will be concatenated along + the 'id' dimension. Parameters ---------- det_names: - Names of the detectors to extract coordinates for. + Names of the detectors to extract pixel IDs for. """ detectors = tuple(det for det in self.detectors if det.name in det_names) - slow_axes = [det.slow_axis for det in detectors] - fast_axes = [det.fast_axis for det in detectors] - origins = [self.sample.position_from_sample(det.position) for det in detectors] + return _construct_pixel_ids(detectors) + + def experiment_metadata(self) -> dict[str, sc.Variable]: + """Extract experiment metadata from the McStas instrument description.""" return { - 'fast_axis': sc.concat(fast_axes, 'panel'), - 'slow_axis': sc.concat(slow_axes, 'panel'), - 'origin_position': sc.concat(origins, 'panel'), 'sample_position': self.sample.position_from_sample(self.sample.position), 'source_position': self.sample.position_from_sample(self.source.position), 'sample_name': sc.scalar(self.sample.name), - 'position': _detector_pixel_positions(detectors, self.sample), + } + + def _detector_metadata(self, det_name: str) -> dict[str, sc.Variable]: + try: + detector = next(det for det in self.detectors if det.name == det_name) + except StopIteration as e: + raise KeyError(f"Detector {det_name} not found.") from e + return { + 'fast_axis': detector.fast_axis, + 'slow_axis': detector.slow_axis, + 'origin_position': self.sample.position_from_sample(detector.position), + 'position': _pixel_positions( + detector, self.sample.position_from_sample(detector.position) + ), + 'detector_shape': sc.scalar(detector.detector_shape), + 'x_pixel_size': detector.step_x, + 'y_pixel_size': detector.step_y, + 'detector_name': sc.scalar(detector.name), + } + + def detector_metadata(self, *det_names: str) -> dict[str, sc.Variable]: + """Extract detector metadata from the McStas instrument description. + + If multiple detector is requested, all metadata will be concatenated along the + 'panel' dimension. + + Parameters + ---------- + det_names: + Names of the detectors to extract metadata for. + + """ + if len(det_names) == 1: + return self._detector_metadata(det_names[0]) + detector_metadatas = { + det_name: self._detector_metadata(det_name) for det_name in det_names + } + # Concat all metadata into panel dimension + metadata_keys: set[str] = set().union( + set(detector_metadatas[det_name].keys()) for det_name in det_names + ) + return { + key: sc.concat( + [metadata[key] for metadata in detector_metadatas.values()], 'panel' + ) + for key in metadata_keys } diff --git a/src/ess/nmx/reduction.py b/src/ess/nmx/reduction.py index 5d54323d..035dfb18 100644 --- a/src/ess/nmx/reduction.py +++ b/src/ess/nmx/reduction.py @@ -2,11 +2,10 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) import scipp as sc -from .mcstas.xml import McStasInstrument from .types import ( - CrystalRotation, - DetectorName, McStasWeight2CountScaleFactor, + NMXDetectorMetadata, + NMXExperimentMetadata, NMXReducedCounts, NMXReducedDataGroup, NMXReducedProbability, @@ -53,20 +52,18 @@ def raw_event_probability_to_counts( def format_nmx_reduced_data( da: NMXReducedCounts, - detector_name: DetectorName, - instrument: McStasInstrument, proton_charge: ProtonCharge, - crystal_rotation: CrystalRotation, + experiment_metadata: NMXExperimentMetadata, + detector_metadata: NMXDetectorMetadata, ) -> NMXReducedDataGroup: """Bin time of arrival data into ``time_bin_step`` bins.""" - new_coords = instrument.to_coords(detector_name) return NMXReducedDataGroup( sc.DataGroup( counts=da, proton_charge=proton_charge, - crystal_rotation=crystal_rotation, - **new_coords, + **experiment_metadata, + **detector_metadata, ) ) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index a79921f6..aae56797 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -24,13 +24,18 @@ McStasWeight2CountScaleFactor = NewType("McStasWeight2CountScaleFactor", sc.Variable) """Scale factor to convert McStas weights to counts""" +NMXExperimentMetadata = NewType("NMXExperimentMetadata", sc.DataGroup) +"""Metadata of the experiment""" + +NMXDetectorMetadata = NewType("NMXDetectorMetadata", sc.DataGroup) +"""Metadata of the detector""" RawEventProbability = NewType("RawEventProbability", sc.DataArray) """DataArray containing the event probabilities read from the McStas file, has coordinates 'id' and 't' """ NMXRawEventCountsDataGroup = NewType("NMXRawEventCountsDataGroup", sc.DataGroup) -"""DataGroup containing the RawEventData and other metadata""" +"""DataGroup containing the RawEventData, experiment metadata and detector metadata""" ProtonCharge = NewType("ProtonCharge", sc.Variable) """The proton charge signal""" @@ -54,4 +59,4 @@ """Histogram of time-of-arrival and pixel-id.""" NMXReducedDataGroup = NewType("NMXReducedDataGroup", sc.DataGroup) -"""Histogram of time-of-arrival and pixel-id, with additional metadata.""" +"""Datagroup containing Histogram(id, t), experiment metadata and detector metadata""" diff --git a/tests/loader_test.py b/tests/loader_test.py index ccd306eb..622652d0 100644 --- a/tests/loader_test.py +++ b/tests/loader_test.py @@ -47,10 +47,8 @@ def check_nmxdata_properties( dg: NMXRawEventCountsDataGroup, fast_axis, slow_axis ) -> None: assert isinstance(dg, sc.DataGroup) - assert_allclose( - sc.squeeze(dg['fast_axis'], 'panel'), fast_axis, atol=sc.scalar(0.005) - ) - assert_identical(sc.squeeze(dg['slow_axis'], 'panel'), slow_axis) + assert_allclose(dg['fast_axis'], fast_axis, atol=sc.scalar(0.005)) + assert_identical(dg['slow_axis'], slow_axis) @pytest.mark.parametrize( From 19cc8e71f7776a7792ceca44f1c7115019033a5c Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 10:24:11 +0100 Subject: [PATCH 07/28] Add raw data metadata retrieval part. --- docs/user-guide/workflow_chunk.ipynb | 64 +++++++++++++++++----------- src/ess/nmx/mcstas/load.py | 31 ++++++++++++++ src/ess/nmx/types.py | 9 ++++ 3 files changed, 79 insertions(+), 25 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 148fe175..a99e72bc 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -37,17 +37,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Compute Maximum Probabiliity\n", + "## Compute Raw Data Metadata\n", "\n", - "`McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", + "`time-of-flight` coordinate and `McStasWeight2CountScaleFactor` should not be different from chunk to chunk.\n", "\n", - "Therefore we need to compute `McStasWeight2CoutScaleFactor` before we compute `NMXReducedDataGroup`.\n", + "Therefore we need to compute `TimeBinStep` and `McStasWeight2CoutScaleFactor` before we compute `NMXReducedData`.\n", "\n", "It can be done by `ess.reduce.streaming.StreamProcessor`.\n", "\n", - "In this example, `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", + "In this example, `MinimumTimeOfArrival`, `MaximumTimeOfArrival` and `MaximumProbability` will be renewed every time a chunk is added to the streaming processor.\n", "\n", - "`MaxAccumulator` remembers the previous maximum value and compute new maximum value with the new chunk.\n", + "`(Min/Max)Accumulator` remembers the previous minimum/maximum value and compute new minimum/maximum value with the new chunk.\n", "\n", "``raw_event_data_chunk_generator`` yields a chunk of raw event probability from mcstas h5 file.\n", "\n", @@ -62,16 +62,20 @@ "source": [ "from functools import partial\n", "from ess.reduce.streaming import StreamProcessor\n", - "from ess.nmx.streaming import MaxAccumulator\n", + "from ess.nmx.streaming import MaxAccumulator, MinAccumulator\n", "\n", "# Stream processor building helper\n", "scalefactor_stream_processor = partial(\n", " StreamProcessor,\n", " dynamic_keys=(RawEventProbability,),\n", - " target_keys=(McStasWeight2CountScaleFactor,),\n", - " accumulators={MaximumProbability: MaxAccumulator},\n", + " target_keys=(NMXRawDataMetadata,),\n", + " accumulators={\n", + " MaximumProbability: MaxAccumulator,\n", + " MaximumTimeOfArrival: MaxAccumulator,\n", + " MinimumTimeOfArrival: MinAccumulator,\n", + " },\n", ")\n", - "scalefactor_wf = wf.copy()" + "metadata_wf = wf.copy()" ] }, { @@ -80,7 +84,7 @@ "metadata": {}, "outputs": [], "source": [ - "scalefactor_wf.visualize(McStasWeight2CountScaleFactor, graph_attr={\"rankdir\": \"TD\"}, compact=True)" + "metadata_wf.visualize(NMXRawDataMetadata, graph_attr={\"rankdir\": \"TD\"}, compact=True)" ] }, { @@ -90,7 +94,10 @@ "outputs": [], "source": [ "from ess.nmx.types import DetectorName\n", - "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", + "from ess.nmx.mcstas.load import (\n", + " raw_event_data_chunk_generator,\n", + " mcstas_weight_to_probability_scalefactor,\n", + ")\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", "\n", @@ -99,10 +106,11 @@ "NUM_DETECTORS = 3\n", "\n", "# Loop over the detectors\n", - "file_path = scalefactor_wf.compute(FilePath)\n", - "scale_factors = {}\n", + "file_path = metadata_wf.compute(FilePath)\n", + "raw_data_metadatas = {}\n", + "\n", "for detector_i in range(0, NUM_DETECTORS):\n", - " temp_wf = scalefactor_wf.copy()\n", + " temp_wf = metadata_wf.copy()\n", " temp_wf[DetectorIndex] = detector_i\n", " detector_name = temp_wf.compute(DetectorName)\n", " max_chunk_id = calculate_number_of_chunks(\n", @@ -123,11 +131,18 @@ " else:\n", " results = processor.add_chunk({RawEventProbability: da})\n", " cur_detector_progress_bar.value += 1\n", - " scale_factors[detector_i] = results[McStasWeight2CountScaleFactor]\n", + " display(results[NMXRawDataMetadata])\n", + " raw_data_metadatas[detector_i] = results[NMXRawDataMetadata]\n", "\n", - "# We take the minimum scale factor for the entire dataset\n", - "scale_factor = min(scale_factors.values())\n", - "scale_factor\n" + "# We take the min/maximum values of the scale factor\n", + "min_toa = min(dg['min_toa'] for dg in raw_data_metadatas.values())\n", + "max_toa = max(dg['max_toa'] for dg in raw_data_metadatas.values())\n", + "max_probability = max(dg['max_probability'] for dg in raw_data_metadatas.values())\n", + "\n", + "toa_bin_edges = sc.linspace(dim='t', start=min_toa, stop=max_toa, num=51)\n", + "scale_factor = mcstas_weight_to_probability_scalefactor(\n", + " max_counts=wf.compute(MaximumCounts), max_probability=max_probability\n", + ")" ] }, { @@ -150,15 +165,14 @@ "from ess.nmx.mcstas.xml import McStasInstrument\n", "\n", "final_wf = wf.copy()\n", - "# Add the scale factor to the workflow\n", + "# Set the scale factor and time bin edges\n", "final_wf[McStasWeight2CountScaleFactor] = scale_factor\n", + "final_wf[TimeBinSteps] = toa_bin_edges\n", "\n", - "# Compute the static information in advance\n", - "# static_info = wf.compute([CrystalRotation, McStasInstrument])\n", - "static_info = wf.compute([McStasInstrument])\n", - "# final_wf[CrystalRotation] = static_info[CrystalRotation]\n", - "final_wf[CrystalRotation] = sc.vector([0, 0, 0.,], unit='deg')\n", - "final_wf[McStasInstrument] = static_info[McStasInstrument]\n", + "# Set the crystal rotation manually for now ...\n", + "final_wf[CrystalRotation] = sc.vector([0, 0, 0.0], unit='deg')\n", + "# Set static info\n", + "final_wf[McStasInstrument] = wf.compute(McStasInstrument)\n", "final_wf.visualize(NMXReducedDataGroup, compact=True)" ] }, diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 100404a4..3efa3d80 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -14,9 +14,12 @@ FilePath, MaximumCounts, MaximumProbability, + MaximumTimeOfArrival, McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, NMXDetectorMetadata, NMXExperimentMetadata, + NMXRawDataMetadata, NMXRawEventCountsDataGroup, PixelIds, RawEventProbability, @@ -285,7 +288,35 @@ def retrieve_pixel_ids( return PixelIds(instrument.pixel_ids(detector_name)) +def calculate_minimum_toa(da: RawEventProbability) -> MinimumTimeOfArrival: + """Calculate the minimum time of arrival from the data.""" + return MinimumTimeOfArrival(da.coords['t'].min()) + + +def calculate_maximum_toa(da: RawEventProbability) -> MaximumTimeOfArrival: + """Calculate the maximum time of arrival from the data.""" + return MaximumTimeOfArrival(da.coords['t'].max()) + + +def retrieve_raw_data_metadata( + min_toa: MinimumTimeOfArrival, + max_toa: MaximumTimeOfArrival, + max_probability: MaximumProbability, +) -> NMXRawDataMetadata: + """Retrieve the metadata of the raw data.""" + return NMXRawDataMetadata( + sc.DataGroup( + min_toa=min_toa, + max_toa=max_toa, + max_probability=max_probability, + ) + ) + + providers = ( + calculate_minimum_toa, + calculate_maximum_toa, + retrieve_raw_data_metadata, read_mcstas_geometry_xml, detector_name_from_index, load_event_data_bank_name, diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index aae56797..100261b2 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -60,3 +60,12 @@ NMXReducedDataGroup = NewType("NMXReducedDataGroup", sc.DataGroup) """Datagroup containing Histogram(id, t), experiment metadata and detector metadata""" + +MinimumTimeOfArrival = NewType("MinimumTimeOfArrival", sc.Variable) +"""Minimum time of arrival of the raw data""" + +MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) +"""Maximum time of arrival of the raw data""" + +NMXRawDataMetadata = NewType("NMXRawDataMetadata", sc.DataGroup) +"""Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" From 3c6c896c2f4f1b74210088d88d56b0d301f499dc Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 13:55:40 +0100 Subject: [PATCH 08/28] Satety check in the export function. --- src/ess/nmx/nexus.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 0badb62e..f3a44266 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -8,6 +8,7 @@ import h5py import numpy as np import scipp as sc +import scippnexus as snx from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup @@ -307,16 +308,57 @@ def export_metadata_as_nxlauetof( _add_arbitrary_metadata(nx_entry, **arbitrary_metadata) +def _validate_existing_metadata( + dg: NMXReducedDataGroup, + detector_group: snx.Group, + sample_group: snx.Group, + safety_checks: bool = True, +) -> None: + flag = True + # check pixel size + flag = flag and sc.identical(dg["x_pixel_size"], detector_group["x_pixel_size"]) + flag = flag and sc.identical(dg["y_pixel_size"], detector_group["y_pixel_size"]) + # check sample name + flag = flag and dg["sample_name"].value == sample_group["name"] + + if not flag and safety_checks: + raise ValueError( + f"Metadata for detector '{dg['detector_name'].value}' in the file " + "does not match the provided data." + ) + elif not flag and not safety_checks: + import warnings + + warnings.warn( + UserWarning( + "Metadata for detector in the file does not match the provided data." + "This may lead to unexpected results." + "However, the operation will proceed as requested " + "since safety checks are disabled." + ), + stacklevel=2, + ) + + def export_reduced_data_as_nxlauetof( dg: NMXReducedDataGroup, output_file: str | pathlib.Path | io.BytesIO, append_mode: bool = True, + safety_checks: bool = True, ) -> None: if not append_mode: raise NotImplementedError("Only append mode is supported for now.") + detector_group_path = f"entry/instrument/{dg['detector_name'].value}" + with snx.File(output_file, "r") as f: + _validate_existing_metadata( + dg=dg, + detector_group=f[detector_group_path][()], + sample_group=f["entry/sample"][()], + safety_checks=safety_checks, + ) with h5py.File(output_file, "r+") as f: - nx_detector: h5py.Group = f[f"entry/instrument/{dg['detector_name'].value}"] + nx_detector: h5py.Group = f[detector_group_path] # Data - shape: [n_x_pixels, n_y_pixels, n_tof_bins] # The actual application definition defines it as integer, # but we keep the original data type for now From 462d1ef5852ac96db49a9a87c28140eb50a08214 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 14:45:54 +0100 Subject: [PATCH 09/28] Add warning filter. --- src/ess/nmx/nexus.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index f3a44266..6b69d52c 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -2,6 +2,7 @@ # Copyright (c) 2023 Scipp contributors (https://github.com/scipp) import io import pathlib +import warnings from functools import partial from typing import Any @@ -135,8 +136,6 @@ def export_as_nexus( Currently exporting step is not expected to be part of sciline pipelines. """ - import warnings - warnings.warn( DeprecationWarning( "Exporting to custom NeXus format will be deprecated in the near future." @@ -327,8 +326,6 @@ def _validate_existing_metadata( "does not match the provided data." ) elif not flag and not safety_checks: - import warnings - warnings.warn( UserWarning( "Metadata for detector in the file does not match the provided data." @@ -349,13 +346,17 @@ def export_reduced_data_as_nxlauetof( if not append_mode: raise NotImplementedError("Only append mode is supported for now.") detector_group_path = f"entry/instrument/{dg['detector_name'].value}" - with snx.File(output_file, "r") as f: - _validate_existing_metadata( - dg=dg, - detector_group=f[detector_group_path][()], - sample_group=f["entry/sample"][()], - safety_checks=safety_checks, - ) + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + # Userwarning is expected here as histogram data is not yet saved. + with snx.File(output_file, "r") as f: + _validate_existing_metadata( + dg=dg, + detector_group=f[detector_group_path][()], + sample_group=f["entry/sample"][()], + safety_checks=safety_checks, + ) with h5py.File(output_file, "r+") as f: nx_detector: h5py.Group = f[detector_group_path] From 9ce7fdc5f3ba70b5e63a9a56180aed3eaa898100 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 13:48:35 +0000 Subject: [PATCH 10/28] Apply automatic formatting --- src/ess/nmx/types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index 2366bd2e..206de867 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -68,6 +68,7 @@ MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) """Maximum time of arrival of the raw data""" + @dataclass class NMXRawDataMetadata: """Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" From 52e0feb94c7cc4233eca278e69bb1d9ca81709fc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 13:48:49 +0000 Subject: [PATCH 11/28] Apply automatic formatting --- src/ess/nmx/types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index 2366bd2e..206de867 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -68,6 +68,7 @@ MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) """Maximum time of arrival of the raw data""" + @dataclass class NMXRawDataMetadata: """Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" From aab78a8d32e127a4168ef88a350bd760702d8885 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 14:49:50 +0100 Subject: [PATCH 12/28] Fix typo --- src/ess/nmx/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index 206de867..0d629021 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -75,4 +75,4 @@ class NMXRawDataMetadata: max_probability: MaximumProbability min_toa: MinimumTimeOfArrival - max_toa: Maxim + max_toa: MaximumTimeOfArrival From 75b9bb41843ed4fccd5856b3e1edde9dc4864c04 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 3 Mar 2025 20:04:14 +0100 Subject: [PATCH 13/28] Move functions to more proper module. --- src/ess/nmx/mcstas/__init__.py | 4 ++++ src/ess/nmx/mcstas/load.py | 12 ------------ src/ess/nmx/reduction.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/ess/nmx/mcstas/__init__.py b/src/ess/nmx/mcstas/__init__.py index c40c8057..9680faf6 100644 --- a/src/ess/nmx/mcstas/__init__.py +++ b/src/ess/nmx/mcstas/__init__.py @@ -6,6 +6,8 @@ def McStasWorkflow(): import sciline as sl from ess.nmx.reduction import ( + calculate_maximum_toa, + calculate_minimum_toa, format_nmx_reduced_data, proton_charge_from_event_counts, raw_event_probability_to_counts, @@ -18,6 +20,8 @@ def McStasWorkflow(): return sl.Pipeline( ( *loader_providers, + calculate_maximum_toa, + calculate_minimum_toa, read_mcstas_geometry_xml, proton_charge_from_event_counts, reduce_raw_event_probability, diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 3efa3d80..1e5f432f 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -288,16 +288,6 @@ def retrieve_pixel_ids( return PixelIds(instrument.pixel_ids(detector_name)) -def calculate_minimum_toa(da: RawEventProbability) -> MinimumTimeOfArrival: - """Calculate the minimum time of arrival from the data.""" - return MinimumTimeOfArrival(da.coords['t'].min()) - - -def calculate_maximum_toa(da: RawEventProbability) -> MaximumTimeOfArrival: - """Calculate the maximum time of arrival from the data.""" - return MaximumTimeOfArrival(da.coords['t'].max()) - - def retrieve_raw_data_metadata( min_toa: MinimumTimeOfArrival, max_toa: MaximumTimeOfArrival, @@ -314,8 +304,6 @@ def retrieve_raw_data_metadata( providers = ( - calculate_minimum_toa, - calculate_maximum_toa, retrieve_raw_data_metadata, read_mcstas_geometry_xml, detector_name_from_index, diff --git a/src/ess/nmx/reduction.py b/src/ess/nmx/reduction.py index 035dfb18..19e70535 100644 --- a/src/ess/nmx/reduction.py +++ b/src/ess/nmx/reduction.py @@ -3,7 +3,9 @@ import scipp as sc from .types import ( + MaximumTimeOfArrival, McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedCounts, @@ -16,6 +18,16 @@ ) +def calculate_minimum_toa(da: RawEventProbability) -> MinimumTimeOfArrival: + """Calculate the minimum time of arrival from the data.""" + return MinimumTimeOfArrival(da.coords['t'].min()) + + +def calculate_maximum_toa(da: RawEventProbability) -> MaximumTimeOfArrival: + """Calculate the maximum time of arrival from the data.""" + return MaximumTimeOfArrival(da.coords['t'].max()) + + def proton_charge_from_event_counts(da: NMXReducedCounts) -> ProtonCharge: """Make up the proton charge from the event counts. From 8f74b8233707a2e75165189ed789d2be7c5ebbfd Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 11:02:04 +0100 Subject: [PATCH 14/28] Lauetof export interface. --- docs/user-guide/workflow_chunk.ipynb | 42 ++++++- src/ess/nmx/nexus.py | 159 ++++++++++++++++++++++++++- 2 files changed, 197 insertions(+), 4 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index a99e72bc..adc95d2f 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -145,15 +145,49 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compute Metadata\n", + "\n", + "Other metadata does not require any chunk-based computation.\n", + "\n", + "Therefore we export the metadata first and append detector data later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ess.nmx.nexus import export_metadata_as_nxlauetof\n", + "\n", + "output_file = \"test.h5\"\n", + "experiment_metadata = wf.compute(NMXExperimentMetadata)\n", + "detector_metas = []\n", + "for detector_i in range(3):\n", + " temp_wf = wf.copy()\n", + " temp_wf[DetectorIndex] = detector_i\n", + " detector_metas.append(temp_wf.compute(NMXDetectorMetadata))\n", + "\n", + "export_metadata_as_nxlauetof(\n", + " experiment_metadata, *detector_metas, output_file=output_file\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute Final Output\n", "\n", - "Now with the `scale_factor: McStasWeight2CountScaleFactor`, we can compute the final output chunk by chunk.\n", + "Now with all the metadata, we can compute the final output chunk by chunk.\n", + "\n", + "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added.\n", "\n", - "We will also compute static parameters in advance so that stream processor does not compute them every time another chunk is added." + "We will as well export the reduced data detector by detector." ] }, { @@ -204,6 +238,7 @@ "from ess.nmx.mcstas.load import raw_event_data_chunk_generator\n", "from ess.nmx.streaming import calculate_number_of_chunks\n", "from ipywidgets import IntProgress\n", + "from ess.nmx.nexus import export_reduced_data_as_nxlauetof\n", "\n", "CHUNK_SIZE = 10 # Number of event rows to process at once\n", "# Increase this number to speed up the processing\n", @@ -243,7 +278,8 @@ " cur_detector_progress_bar.value += 1\n", "\n", " result = results[NMXReducedDataGroup]\n", - " display(result)\n" + " display(result)\n", + " export_reduced_data_as_nxlauetof(result, output_file=output_file)\n" ] } ], diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index bf6b509d..59265938 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -3,10 +3,14 @@ import io import pathlib from functools import partial +from typing import Any import h5py +import numpy as np import scipp as sc +from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup + def _create_dataset_from_var( *, @@ -16,6 +20,7 @@ def _create_dataset_from_var( long_name: str | None = None, compression: str | None = None, compression_opts: int | None = None, + dtype: Any = None, ) -> h5py.Dataset: compression_options = {} if compression is not None: @@ -25,7 +30,7 @@ def _create_dataset_from_var( dataset = root_entry.create_dataset( name, - data=var.values, + data=var.values if dtype is None else var.values.astype(dtype, copy=False), **compression_options, ) dataset.attrs["units"] = str(var.unit) @@ -145,3 +150,155 @@ def export_as_nexus( _create_instrument_group(data, nx_entry) _create_detector_group(data, nx_entry) _create_source_group(data, nx_entry) + + +def _create_lauetof_data_entry(file_obj: h5py.File) -> h5py.Group: + nx_entry = file_obj.create_group("entry") + nx_entry.attrs["NX_class"] = "NXentry" + return nx_entry + + +def _add_lauetof_definition(nx_entry: h5py.Group) -> None: + nx_entry["definition"] = "NXlauetof" + + +def _add_lauetof_instrument(nx_entry: h5py.Group) -> h5py.Group: + nx_instrument = nx_entry.create_group("instrument") + nx_instrument.attrs["NX_class"] = "NXinstrument" + nx_instrument["name"] = "NMX" + return nx_instrument + + +def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> None: + nx_detector = nx_instrument.create_group(dg["detector_name"].value) # Detector name + nx_detector.attrs["NX_class"] = "NXdetector" + # Polar angle + _create_dataset_from_var( + name="polar_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit='deg'), # TODO: Add real data + ) + # Azimuthal angle + _create_dataset_from_var( + name="azimuthal_angle", + root_entry=nx_detector, + var=sc.scalar(0, unit=''), # TODO: Add real data + ) + # x_pixel_size + _create_dataset_from_var( + name="x_pixel_size", root_entry=nx_detector, var=dg["x_pixel_size"] + ) + # y_pixel_size + _create_dataset_from_var( + name="y_pixel_size", root_entry=nx_detector, var=dg["y_pixel_size"] + ) + # distance + _create_dataset_from_var( + name="distance", + root_entry=nx_detector, + var=sc.scalar(0, unit='m'), # TODO: Add real data + ) + # Legacy geometry information until we have a better way to store it + _create_dataset_from_var( + name="origin", root_entry=nx_detector, var=dg['origin_position'] + ) + # Fast axis, along where the pixel ID increases by 1 + _create_dataset_from_var( + root_entry=nx_detector, var=dg['fast_axis'], name="fast_axis" + ) + # Slow axis, along where the pixel ID increases + # by the number of pixels in the fast axis + _create_dataset_from_var( + root_entry=nx_detector, var=dg['slow_axis'], name="slow_axis" + ) + + +def _add_lauetof_sample_group(dg: NMXExperimentMetadata, nx_entry: h5py.Group) -> None: + nx_sample = nx_entry.create_group("sample") + nx_sample.attrs["NX_class"] = "NXsample" + nx_sample["name"] = ( + dg['sample_name'].value + if isinstance(dg['sample_name'], sc.Variable) + else dg['sample_name'] + ) + _create_dataset_from_var( + name='orientation_matrix', + root_entry=nx_sample, + var=sc.array( + dims=['i', 'j'], + values=[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + unit="dimensionless", + ), # TODO: Add real data, the sample orientation matrix + ) + _create_dataset_from_var( + name='unit_cell', + root_entry=nx_sample, + var=sc.array( + dims=['i'], + values=[1.0, 1.0, 1.0, 90.0, 90.0, 90.0], + unit="dimensionless", # TODO: Add real data, + # a, b, c, alpha, beta, gamma + ), + ) + + +def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None: + nx_monitor = nx_entry.create_group("control") + nx_monitor.attrs["NX_class"] = "NXmonitor" + nx_monitor["mode"] = "monitor" + nx_monitor["preset"] = 0.0 # Check if this is the correct value + _create_dataset_from_var( + name='data', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="counts" + ), # TODO: Add real data, bin values + ) + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_monitor, + var=sc.array( + dims=['tof'], values=[1, 1, 1], unit="s" + ), # TODO: Add real data, bin edges + ) + + +def export_metadata_as_nxlauetof( + experiment_metadata: NMXExperimentMetadata, + *detector_metadatas: NMXDetectorMetadata, + output_file: str | pathlib.Path | io.BytesIO, +): + with h5py.File(output_file, "w") as f: + f.attrs["NX_class"] = "NXlauetof" + nx_entry = _create_lauetof_data_entry(f) + _add_lauetof_definition(nx_entry) + nx_instrument = _add_lauetof_instrument(nx_entry) + _add_lauetof_sample_group(experiment_metadata, nx_entry) + # Placeholder for ``monitor`` group + _add_lauetof_monitor_group(experiment_metadata, nx_entry) + # Skipping ``name`` field + # Add detector group metadata + for detector_metadata in detector_metadatas: + _add_lauetof_detector_group(detector_metadata, nx_instrument) + + +def export_reduced_data_as_nxlauetof( + dg: NMXReducedDataGroup, + output_file: str | pathlib.Path | io.BytesIO, + append_mode: bool = True, +) -> None: + if not append_mode: + raise NotImplementedError("Only append mode is supported for now.") + + with h5py.File(output_file, "r+") as f: + nx_detector: h5py.Group = f[f"entry/instrument/{dg['detector_name'].value}"] + # Data - shape: [n_x_pixels, n_y_pixels, n_tof_bins] + # The actual application definition defines it as integer, + # but we keep the original data type for now + num_x, num_y = dg["detector_shape"].value # Probably better way to do this + _create_dataset_from_var( + name="data", + root_entry=nx_detector, + var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), + dtype=np.uint, + ) From 9e3edc1853b671254434b58aad661aadf4193f9a Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 11:22:41 +0100 Subject: [PATCH 15/28] Raw data metadata as dataclass --- docs/user-guide/workflow_chunk.ipynb | 6 +++--- src/ess/nmx/mcstas/load.py | 6 +----- src/ess/nmx/types.py | 11 +++++++++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index adc95d2f..673cd13b 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -135,9 +135,9 @@ " raw_data_metadatas[detector_i] = results[NMXRawDataMetadata]\n", "\n", "# We take the min/maximum values of the scale factor\n", - "min_toa = min(dg['min_toa'] for dg in raw_data_metadatas.values())\n", - "max_toa = max(dg['max_toa'] for dg in raw_data_metadatas.values())\n", - "max_probability = max(dg['max_probability'] for dg in raw_data_metadatas.values())\n", + "min_toa = min(meta.min_toa for meta in raw_data_metadatas.values())\n", + "max_toa = max(meta.max_toa for meta in raw_data_metadatas.values())\n", + "max_probability = max(meta.max_probability for meta in raw_data_metadatas.values())\n", "\n", "toa_bin_edges = sc.linspace(dim='t', start=min_toa, stop=max_toa, num=51)\n", "scale_factor = mcstas_weight_to_probability_scalefactor(\n", diff --git a/src/ess/nmx/mcstas/load.py b/src/ess/nmx/mcstas/load.py index 1e5f432f..b989d092 100644 --- a/src/ess/nmx/mcstas/load.py +++ b/src/ess/nmx/mcstas/load.py @@ -295,11 +295,7 @@ def retrieve_raw_data_metadata( ) -> NMXRawDataMetadata: """Retrieve the metadata of the raw data.""" return NMXRawDataMetadata( - sc.DataGroup( - min_toa=min_toa, - max_toa=max_toa, - max_probability=max_probability, - ) + min_toa=min_toa, max_toa=max_toa, max_probability=max_probability ) diff --git a/src/ess/nmx/types.py b/src/ess/nmx/types.py index 100261b2..0d629021 100644 --- a/src/ess/nmx/types.py +++ b/src/ess/nmx/types.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any, NewType import scipp as sc @@ -67,5 +68,11 @@ MaximumTimeOfArrival = NewType("MaximumTimeOfArrival", sc.Variable) """Maximum time of arrival of the raw data""" -NMXRawDataMetadata = NewType("NMXRawDataMetadata", sc.DataGroup) -"""Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" + +@dataclass +class NMXRawDataMetadata: + """Metadata of the raw data, i.e. maximum weight and min/max time of arrival""" + + max_probability: MaximumProbability + min_toa: MinimumTimeOfArrival + max_toa: MaximumTimeOfArrival From f2308fc8b5a89dc7da863ef8addbd62661bbf579 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 13:25:12 +0100 Subject: [PATCH 16/28] Allow arbitrary metadata and export time of flight from the coordinate. --- docs/user-guide/workflow_chunk.ipynb | 6 +++++- src/ess/nmx/nexus.py | 32 +++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 673cd13b..729a8778 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -173,7 +173,11 @@ " detector_metas.append(temp_wf.compute(NMXDetectorMetadata))\n", "\n", "export_metadata_as_nxlauetof(\n", - " experiment_metadata, *detector_metas, output_file=output_file\n", + " *detector_metas,\n", + " experiment_metadata=experiment_metadata,\n", + " output_file=output_file,\n", + " # Arbitrary metadata falls into ``entry`` group as a variable.\n", + " mcstas_weight2count_scale_factor=scale_factor,\n", ")" ] }, diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 59265938..0badb62e 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -263,10 +263,33 @@ def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None ) +def _add_arbitrary_metadata( + nx_entry: h5py.Group, **arbitrary_metadata: sc.Variable +) -> None: + if not arbitrary_metadata: + return + + metadata_group = nx_entry.create_group("metadata") + for key, value in arbitrary_metadata.items(): + if not isinstance(value, sc.Variable): + import warnings + + msg = f"Skipping metadata key '{key}' as it is not a scipp.Variable." + warnings.warn(UserWarning(msg), stacklevel=2) + continue + else: + _create_dataset_from_var( + name=key, + root_entry=metadata_group, + var=value, + ) + + def export_metadata_as_nxlauetof( - experiment_metadata: NMXExperimentMetadata, *detector_metadatas: NMXDetectorMetadata, + experiment_metadata: NMXExperimentMetadata, output_file: str | pathlib.Path | io.BytesIO, + **arbitrary_metadata: sc.Variable, ): with h5py.File(output_file, "w") as f: f.attrs["NX_class"] = "NXlauetof" @@ -280,6 +303,8 @@ def export_metadata_as_nxlauetof( # Add detector group metadata for detector_metadata in detector_metadatas: _add_lauetof_detector_group(detector_metadata, nx_instrument) + # Add arbitrary metadata + _add_arbitrary_metadata(nx_entry, **arbitrary_metadata) def export_reduced_data_as_nxlauetof( @@ -302,3 +327,8 @@ def export_reduced_data_as_nxlauetof( var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), dtype=np.uint, ) + _create_dataset_from_var( + name='time_of_flight', + root_entry=nx_detector, + var=sc.midpoints(dg['counts'].coords['t'], dim='t'), + ) From 77cb6c99a8dd8414eddc61c311f5aa9d4d89765f Mon Sep 17 00:00:00 2001 From: Sunyoung Yoo Date: Mon, 3 Mar 2025 20:11:40 +0100 Subject: [PATCH 17/28] Specify unit Co-authored-by: Simon Heybrock <12912489+SimonHeybrock@users.noreply.github.com> --- src/ess/nmx/nexus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 0badb62e..12ce1766 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -182,7 +182,7 @@ def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> _create_dataset_from_var( name="azimuthal_angle", root_entry=nx_detector, - var=sc.scalar(0, unit=''), # TODO: Add real data + var=sc.scalar(0, unit='deg'), # TODO: Add real data ) # x_pixel_size _create_dataset_from_var( From 80eb19079156e21d155fc799cbdc320253e7e1ba Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 3 Mar 2025 20:23:20 +0100 Subject: [PATCH 18/28] Add docstring to export methods. --- src/ess/nmx/nexus.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 12ce1766..5f65b5a1 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -290,7 +290,27 @@ def export_metadata_as_nxlauetof( experiment_metadata: NMXExperimentMetadata, output_file: str | pathlib.Path | io.BytesIO, **arbitrary_metadata: sc.Variable, -): +) -> None: + """Export the metadata to a NeXus file with the LAUE_TOF application definition. + + ``Metadata`` in this context refers to the information + that is not part of the reduced detector counts itself, + but is necessary for the interpretation of the reduced data. + Since NMX can have arbitrary number of detectors, + this function can take multiple detector metadata objects. + + Parameters + ---------- + detector_metadatas: + Detector metadata objects. + experiment_metadata: + Experiment metadata object. + output_file: + Output file path. + arbitrary_metadata: + Arbitrary metadata that does not fit into the existing metadata objects. + + """ with h5py.File(output_file, "w") as f: f.attrs["NX_class"] = "NXlauetof" nx_entry = _create_lauetof_data_entry(f) @@ -312,6 +332,27 @@ def export_reduced_data_as_nxlauetof( output_file: str | pathlib.Path | io.BytesIO, append_mode: bool = True, ) -> None: + """Export the reduced data to a NeXus file with the LAUE_TOF application definition. + + Even though this function only exports + reduced data(detector counts and its coordinates), + the input should contain all the necessary metadata + for minimum sanity check. + + Parameters + ---------- + dg: + Reduced data and metadata. + output_file: + Output file path. + append_mode: + If ``True``, the file is opened in append mode. + If ``False``, the file is opened in None-append mode. + > None-append mode is not supported for now. + > Only append mode is supported for now. + + """ + if not append_mode: raise NotImplementedError("Only append mode is supported for now.") From 466ce198b3e898a6a07ebf870345cc6dd98bcd36 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 3 Mar 2025 20:28:51 +0100 Subject: [PATCH 19/28] Add missing attributes. --- src/ess/nmx/nexus.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 5f65b5a1..57d7bac1 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -247,13 +247,15 @@ def _add_lauetof_monitor_group(data: sc.DataGroup, nx_entry: h5py.Group) -> None nx_monitor.attrs["NX_class"] = "NXmonitor" nx_monitor["mode"] = "monitor" nx_monitor["preset"] = 0.0 # Check if this is the correct value - _create_dataset_from_var( + data_dset = _create_dataset_from_var( name='data', root_entry=nx_monitor, var=sc.array( dims=['tof'], values=[1, 1, 1], unit="counts" ), # TODO: Add real data, bin values ) + data_dset.attrs["signal"] = 1 + data_dset.attrs["primary"] = 1 _create_dataset_from_var( name='time_of_flight', root_entry=nx_monitor, @@ -362,12 +364,13 @@ def export_reduced_data_as_nxlauetof( # The actual application definition defines it as integer, # but we keep the original data type for now num_x, num_y = dg["detector_shape"].value # Probably better way to do this - _create_dataset_from_var( + data_dset = _create_dataset_from_var( name="data", root_entry=nx_detector, var=sc.fold(dg['counts'].data, dim='id', sizes={'x': num_x, 'y': num_y}), dtype=np.uint, ) + data_dset.attrs["signal"] = 1 _create_dataset_from_var( name='time_of_flight', root_entry=nx_detector, From 01483e4b2520ae5e92a71f29342d71df96b8ffd5 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 3 Mar 2025 20:29:30 +0100 Subject: [PATCH 20/28] Remove comments --- src/ess/nmx/nexus.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 57d7bac1..921f458f 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -172,27 +172,22 @@ def _add_lauetof_instrument(nx_entry: h5py.Group) -> h5py.Group: def _add_lauetof_detector_group(dg: sc.DataGroup, nx_instrument: h5py.Group) -> None: nx_detector = nx_instrument.create_group(dg["detector_name"].value) # Detector name nx_detector.attrs["NX_class"] = "NXdetector" - # Polar angle _create_dataset_from_var( name="polar_angle", root_entry=nx_detector, var=sc.scalar(0, unit='deg'), # TODO: Add real data ) - # Azimuthal angle _create_dataset_from_var( name="azimuthal_angle", root_entry=nx_detector, var=sc.scalar(0, unit='deg'), # TODO: Add real data ) - # x_pixel_size _create_dataset_from_var( name="x_pixel_size", root_entry=nx_detector, var=dg["x_pixel_size"] ) - # y_pixel_size _create_dataset_from_var( name="y_pixel_size", root_entry=nx_detector, var=dg["y_pixel_size"] ) - # distance _create_dataset_from_var( name="distance", root_entry=nx_detector, From 0984094ffa42b2ca2713c9fd2bb68faec491afeb Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Mon, 3 Mar 2025 20:38:37 +0100 Subject: [PATCH 21/28] Fix typo. --- src/ess/nmx/nexus.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/ess/nmx/nexus.py b/src/ess/nmx/nexus.py index 8275584e..c2cd6dfe 100644 --- a/src/ess/nmx/nexus.py +++ b/src/ess/nmx/nexus.py @@ -13,8 +13,6 @@ from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup -from .types import NMXDetectorMetadata, NMXExperimentMetadata, NMXReducedDataGroup - def _create_dataset_from_var( *, @@ -361,24 +359,6 @@ def export_reduced_data_as_nxlauetof( output_file: str | pathlib.Path | io.BytesIO, append_mode: bool = True, safety_checks: bool = True, -) -> None: - if not append_mode: - raise NotImplementedError("Only append mode is supported for now.") - detector_group_path = f"entry/instrument/{dg['detector_name'].value}" - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - # Userwarning is expected here as histogram data is not yet saved. - with snx.File(output_file, "r") as f: - _validate_existing_metadata( - dg=dg, - detector_group=f[detector_group_path][()], - sample_group=f["entry/sample"][()], - safety_checks=safety_checks, - ) - - with h5py.File(output_file, "r+") as f: - nx_detector: h5py.Group = f[detector_group_path] ) -> None: """Export the reduced data to a NeXus file with the LAUE_TOF application definition. @@ -400,6 +380,23 @@ def export_reduced_data_as_nxlauetof( > Only append mode is supported for now. """ + if not append_mode: + raise NotImplementedError("Only append mode is supported for now.") + detector_group_path = f"entry/instrument/{dg['detector_name'].value}" + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + # Userwarning is expected here as histogram data is not yet saved. + with snx.File(output_file, "r") as f: + _validate_existing_metadata( + dg=dg, + detector_group=f[detector_group_path][()], + sample_group=f["entry/sample"][()], + safety_checks=safety_checks, + ) + + with h5py.File(output_file, "r+") as f: + nx_detector: h5py.Group = f[detector_group_path] if not append_mode: raise NotImplementedError("Only append mode is supported for now.") From 0ce7349e1a86b804db5bc6d92452d182497b44cd Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 14:53:21 +0100 Subject: [PATCH 22/28] Move default parameter dictionary. --- src/ess/nmx/__init__.py | 3 --- src/ess/nmx/mcstas/__init__.py | 6 +++++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ess/nmx/__init__.py b/src/ess/nmx/__init__.py index 57586b2e..3787f2d7 100644 --- a/src/ess/nmx/__init__.py +++ b/src/ess/nmx/__init__.py @@ -15,13 +15,10 @@ from .reduction import NMXReducedDataGroup from .types import MaximumCounts, NMXRawEventCountsDataGroup -default_parameters = {MaximumCounts: 10000} - del MaximumCounts __all__ = [ "NMXRawEventCountsDataGroup", "NMXReducedDataGroup", - "default_parameters", "small_mcstas_3_sample", ] diff --git a/src/ess/nmx/mcstas/__init__.py b/src/ess/nmx/mcstas/__init__.py index 9680faf6..9e326f6f 100644 --- a/src/ess/nmx/mcstas/__init__.py +++ b/src/ess/nmx/mcstas/__init__.py @@ -14,9 +14,12 @@ def McStasWorkflow(): reduce_raw_event_probability, ) + from ..types import MaximumCounts from .load import providers as loader_providers from .xml import read_mcstas_geometry_xml + default_parameters = {MaximumCounts: 10000} + return sl.Pipeline( ( *loader_providers, @@ -27,5 +30,6 @@ def McStasWorkflow(): reduce_raw_event_probability, raw_event_probability_to_counts, format_nmx_reduced_data, - ) + ), + params=default_parameters, ) From 96933236fe3d88dde1cb251dff832189fc1000ad Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 14:53:44 +0100 Subject: [PATCH 23/28] Expose executable reduction function as a script. --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9559362b..a6f12400 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,9 @@ test = [ "pytest", ] +[project.scripts] +essnmx_reduce_mcstas = "ess.nmx.mcstas.executables:main" + [project.urls] "Bug Tracker" = "https://github.com/scipp/essnmx/issues" "Documentation" = "https://scipp.github.io/essnmx" From 5b47c7ad4d4aaa6f1461c084818cb10db76af973 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 15:22:12 +0100 Subject: [PATCH 24/28] Data reduction script interface. --- docs/user-guide/workflow_chunk.ipynb | 39 ++++++++++++++++++++++++++++ src/ess/nmx/mcstas/__init__.py | 6 ++--- tests/loader_test.py | 2 +- tests/workflow_test.py | 2 +- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/docs/user-guide/workflow_chunk.ipynb b/docs/user-guide/workflow_chunk.ipynb index 729a8778..d6b3d8fa 100644 --- a/docs/user-guide/workflow_chunk.ipynb +++ b/docs/user-guide/workflow_chunk.ipynb @@ -8,6 +8,45 @@ "In this example, we will process McStas events chunk by chunk, panel by panel." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TL;DR\n", + "\n", + "There is a reduction script that process the dataset chunk by chunk.\n", + "\n", + "> It may have some hard-coded parameters due to unfinished work on the reduction workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import os\n", + "from ess.nmx.data import small_mcstas_3_sample\n", + "\n", + "os.environ['INTPUT_FILE_PATH'] = small_mcstas_3_sample()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!essnmx_reduce_mcstas \\\n", + " --input_file ${INTPUT_FILE_PATH} \\\n", + " --output_file script-test.h5 \\\n", + " --verbose \\\n", + " --chunk_size 10_000_000 \\\n", + " --detector_ids 0 1 2\n" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/src/ess/nmx/mcstas/__init__.py b/src/ess/nmx/mcstas/__init__.py index 9e326f6f..3026dc20 100644 --- a/src/ess/nmx/mcstas/__init__.py +++ b/src/ess/nmx/mcstas/__init__.py @@ -1,5 +1,8 @@ # SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2024 Scipp contributors (https://github.com/scipp) +from ..types import MaximumCounts + +default_parameters = {MaximumCounts: 10000} def McStasWorkflow(): @@ -14,12 +17,9 @@ def McStasWorkflow(): reduce_raw_event_probability, ) - from ..types import MaximumCounts from .load import providers as loader_providers from .xml import read_mcstas_geometry_xml - default_parameters = {MaximumCounts: 10000} - return sl.Pipeline( ( *loader_providers, diff --git a/tests/loader_test.py b/tests/loader_test.py index 622652d0..bebddb2f 100644 --- a/tests/loader_test.py +++ b/tests/loader_test.py @@ -10,8 +10,8 @@ import scippnexus as snx from scipp.testing import assert_allclose, assert_identical -from ess.nmx import default_parameters from ess.nmx.data import small_mcstas_2_sample, small_mcstas_3_sample +from ess.nmx.mcstas import default_parameters from ess.nmx.mcstas.load import bank_names_to_detector_names, load_crystal_rotation from ess.nmx.mcstas.load import providers as loader_providers from ess.nmx.types import ( diff --git a/tests/workflow_test.py b/tests/workflow_test.py index f0c5c8a4..f785fa5e 100644 --- a/tests/workflow_test.py +++ b/tests/workflow_test.py @@ -5,8 +5,8 @@ import sciline as sl import scipp as sc -from ess.nmx import default_parameters from ess.nmx.data import small_mcstas_2_sample, small_mcstas_3_sample +from ess.nmx.mcstas import default_parameters from ess.nmx.mcstas.load import providers as load_providers from ess.nmx.reduction import ( NMXReducedDataGroup, From c48a1650c969b1bb872f610c53f876d55d9dbd4e Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 15:24:46 +0100 Subject: [PATCH 25/28] Data reduction script interface. --- src/ess/nmx/mcstas/executables.py | 268 ++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 src/ess/nmx/mcstas/executables.py diff --git a/src/ess/nmx/mcstas/executables.py b/src/ess/nmx/mcstas/executables.py new file mode 100644 index 00000000..9702596e --- /dev/null +++ b/src/ess/nmx/mcstas/executables.py @@ -0,0 +1,268 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2025 Scipp contributors (https://github.com/scipp) +import argparse +import logging +import pathlib +import sys +from collections.abc import Callable +from functools import partial + +import sciline as sl +import scipp as sc + +from ess.reduce.streaming import EternalAccumulator, StreamProcessor + +from ..nexus import export_metadata_as_nxlauetof, export_reduced_data_as_nxlauetof +from ..streaming import MaxAccumulator, MinAccumulator, calculate_number_of_chunks +from ..types import ( + CrystalRotation, + DetectorIndex, + DetectorName, + FilePath, + MaximumCounts, + MaximumProbability, + MaximumTimeOfArrival, + McStasWeight2CountScaleFactor, + MinimumTimeOfArrival, + NMXDetectorMetadata, + NMXExperimentMetadata, + NMXRawDataMetadata, + NMXReducedCounts, + NMXReducedDataGroup, + PixelIds, + RawEventProbability, + TimeBinSteps, +) +from . import McStasWorkflow +from .load import ( + mcstas_weight_to_probability_scalefactor, + raw_event_data_chunk_generator, +) +from .xml import McStasInstrument + + +def _build_metadata_streaming_processor_helper() -> ( + Callable[[sl.Pipeline], StreamProcessor] +): + return partial( + StreamProcessor, + dynamic_keys=(RawEventProbability,), + target_keys=(NMXRawDataMetadata,), + accumulators={ + MaximumProbability: MaxAccumulator, + MaximumTimeOfArrival: MaxAccumulator, + MinimumTimeOfArrival: MinAccumulator, + }, + ) + + +def _build_final_streaming_processor_helper() -> ( + Callable[[sl.Pipeline], StreamProcessor] +): + return partial( + StreamProcessor, + dynamic_keys=(RawEventProbability,), + target_keys=(NMXReducedDataGroup,), + accumulators={NMXReducedCounts: EternalAccumulator}, + ) + + +def calculate_raw_data_metadata( + *detector_ids: DetectorIndex | DetectorName, + wf: sl.Pipeline, + chunk_size: int = 10_000_000, + logger: logging.Logger | None = None, +) -> NMXRawDataMetadata: + # Stream processor building helper + scalefactor_stream_processor = _build_metadata_streaming_processor_helper() + metadata_wf = wf.copy() + # Loop over the detectors + file_path = metadata_wf.compute(FilePath) + raw_data_metadatas = {} + + for detector_i in detector_ids: + temp_wf = metadata_wf.copy() + if isinstance(detector_i, str): + temp_wf[DetectorName] = detector_i + else: + temp_wf[DetectorIndex] = detector_i + + detector_name = temp_wf.compute(DetectorName) + max_chunk_id = calculate_number_of_chunks( + temp_wf.compute(FilePath), + detector_name=detector_name, + chunk_size=chunk_size, + ) + # Build the stream processor + processor = scalefactor_stream_processor(temp_wf) + for i_da, da in enumerate( + raw_event_data_chunk_generator( + file_path=file_path, detector_name=detector_name, chunk_size=chunk_size + ) + ): + if any(da.sizes.values()) == 0: + continue + else: + results = processor.add_chunk({RawEventProbability: da}) + if logger is not None: + logger.info( + "[{%s}/{%s}] Processed chunk for {%s}", + i_da, + max_chunk_id, + detector_name, + ) + + raw_data_metadatas[detector_i] = results[NMXRawDataMetadata] + + # We take the min/maximum values of the scale factor + # We are doing it manually because it is not possible to update parameters + # in the workflow that stream processor uses. + min_toa = min(dg.min_toa for dg in raw_data_metadatas.values()) + max_toa = max(dg.max_toa for dg in raw_data_metadatas.values()) + max_probability = max(dg.max_probability for dg in raw_data_metadatas.values()) + + return NMXRawDataMetadata( + min_toa=min_toa, max_toa=max_toa, max_probability=max_probability + ) + + +def reduction( + *, + input_file: pathlib.Path, + output_file: pathlib.Path, + chunk_size: int = 10_000_000, + detector_ids: list[int], + wf: sl.Pipeline | None = None, + logger: logging.Logger | None = None, +) -> None: + wf = wf.copy() if wf is not None else McStasWorkflow() + wf[FilePath] = input_file + # Set static info + wf[McStasInstrument] = wf.compute(McStasInstrument) + + # Calculate parameters for data reduction + data_metadata = calculate_raw_data_metadata( + *detector_ids, wf=wf, logger=logger, chunk_size=chunk_size + ) + if logger is not None: + logger.info("Metadata retrieved: %s", data_metadata) + + toa_bin_edges = sc.linspace( + dim='t', start=data_metadata.min_toa, stop=data_metadata.max_toa, num=51 + ) + scale_factor = mcstas_weight_to_probability_scalefactor( + max_counts=wf.compute(MaximumCounts), + max_probability=data_metadata.max_probability, + ) + # Compute metadata and make the skeleton output file + experiment_metadata = wf.compute(NMXExperimentMetadata) + detector_metas = [] + for detector_i in range(3): + temp_wf = wf.copy() + temp_wf[DetectorIndex] = detector_i + detector_metas.append(temp_wf.compute(NMXDetectorMetadata)) + + if logger is not None: + logger.info("Exporting metadata into the output file %s", output_file) + + export_metadata_as_nxlauetof( + *detector_metas, + experiment_metadata=experiment_metadata, + output_file=output_file, + # Arbitrary metadata falls into ``entry`` group as a variable. + mcstas_weight2count_scale_factor=scale_factor, + ) + # Compute histogram + final_wf = wf.copy() + # Set the scale factor and time bin edges + final_wf[McStasWeight2CountScaleFactor] = scale_factor + final_wf[TimeBinSteps] = toa_bin_edges + + file_path = final_wf.compute(FilePath) + final_stream_processor = _build_final_streaming_processor_helper() + # Loop over the detectors + for detector_i in detector_ids: + temp_wf = final_wf.copy() + if isinstance(detector_i, str): + temp_wf[DetectorName] = detector_i + else: + temp_wf[DetectorIndex] = detector_i + # Set static information as parameters + detector_name = temp_wf.compute(DetectorName) + temp_wf[PixelIds] = temp_wf.compute(PixelIds) + max_chunk_id = calculate_number_of_chunks( + file_path, detector_name=detector_name, chunk_size=chunk_size + ) + + # Build the stream processor + processor = final_stream_processor(temp_wf) + for i_da, da in enumerate( + raw_event_data_chunk_generator( + file_path=file_path, detector_name=detector_name, chunk_size=chunk_size + ) + ): + if any(da.sizes.values()) == 0: + continue + else: + results = processor.add_chunk({RawEventProbability: da}) + if logger is not None: + logger.info( + "[{%s}/{%s}] Processed chunk for {%s}", + i_da, + max_chunk_id, + detector_name, + ) + + result = results[NMXReducedDataGroup] + if logger is not None: + logger.info("Appending reduced data into the output file %s", output_file) + export_reduced_data_as_nxlauetof(result, output_file=output_file) + + +def main() -> None: + parser = argparse.ArgumentParser(description="McStas Data Reduction.") + parser.add_argument("--input_file", type=str, help="Path to the input file") + parser.add_argument( + "--output_file", + type=str, + default="scipp_output.h5", + help="Path to the output file", + ) + parser.add_argument( + "--verbose", action="store_true", help="Increase output verbosity" + ) + parser.add_argument( + "--chunk_size", + type=int, + default=10_000_000, + help="Chunk size for processing", + ) + parser.add_argument( + "--detector_ids", + type=int, + nargs="+", + default=[0, 1, 2], + help="Detector indices to process", + ) + + args = parser.parse_args() + + input_file = pathlib.Path(args.input_file).resolve() + output_file = pathlib.Path(args.output_file).resolve() + + logger = logging.getLogger(__name__) + if args.verbose: + logger.setLevel(logging.INFO) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + wf = McStasWorkflow() + # Set the crystal rotation manually for now ... + wf[CrystalRotation] = sc.vector([0, 0, 0.0], unit='deg') + reduction( + input_file=input_file, + output_file=output_file, + chunk_size=args.chunk_size, + detector_ids=args.detector_ids, + logger=logger, + wf=wf, + ) From 3dfe188dca2f067d04debbe800d66e12fe581102 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Thu, 27 Feb 2025 16:10:14 +0100 Subject: [PATCH 26/28] Fix progress log. --- src/ess/nmx/mcstas/executables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ess/nmx/mcstas/executables.py b/src/ess/nmx/mcstas/executables.py index 9702596e..b66a1ce7 100644 --- a/src/ess/nmx/mcstas/executables.py +++ b/src/ess/nmx/mcstas/executables.py @@ -107,7 +107,7 @@ def calculate_raw_data_metadata( if logger is not None: logger.info( "[{%s}/{%s}] Processed chunk for {%s}", - i_da, + i_da + 1, max_chunk_id, detector_name, ) @@ -208,7 +208,7 @@ def reduction( if logger is not None: logger.info( "[{%s}/{%s}] Processed chunk for {%s}", - i_da, + i_da + 1, max_chunk_id, detector_name, ) From b96563f268b3c7bdc18ba343e939c6d027d34d81 Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 28 Feb 2025 13:05:00 +0100 Subject: [PATCH 27/28] Update accumulator to match the interface. --- src/ess/nmx/streaming.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ess/nmx/streaming.py b/src/ess/nmx/streaming.py index ea08b673..9460b648 100644 --- a/src/ess/nmx/streaming.py +++ b/src/ess/nmx/streaming.py @@ -18,10 +18,6 @@ def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) self._cur_min: sc.Variable | None = None - @property - def value(self) -> sc.Variable | None: - return self._cur_min - def _do_push(self, value: sc.Variable) -> None: new_min = value.min() if self._cur_min is None: @@ -29,6 +25,11 @@ def _do_push(self, value: sc.Variable) -> None: else: self._cur_min = min(self._cur_min, new_min) + def _get_value(self) -> Any: + return self._cur_min + + def clear(self) -> None: ... + class MaxAccumulator(Accumulator): """Accumulator that keeps track of the maximum value seen so far.""" @@ -37,10 +38,6 @@ def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) self._cur_max: sc.Variable | None = None - @property - def value(self) -> sc.Variable | None: - return self._cur_max - def _do_push(self, value: sc.Variable) -> None: new_max = value.max() if self._cur_max is None: @@ -48,6 +45,11 @@ def _do_push(self, value: sc.Variable) -> None: else: self._cur_max = max(self._cur_max, new_max) + def _get_value(self) -> sc.Variable | None: + return self._cur_max + + def clear(self) -> None: ... + def calculate_number_of_chunks( file_path: FilePath, From 1cd25280e1745c16de40b52709614ff5a2b61f6c Mon Sep 17 00:00:00 2001 From: YooSunyoung Date: Fri, 28 Feb 2025 13:25:52 +0100 Subject: [PATCH 28/28] Update essreduce. --- requirements/base.txt | 4 ++-- requirements/docs.txt | 2 +- requirements/nightly.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/base.txt b/requirements/base.txt index 4c060f5d..5ca3865d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -29,7 +29,7 @@ dnspython==2.7.0 # via email-validator email-validator==2.2.0 # via scippneutron -essreduce==25.2.4 +essreduce==25.2.5 # via -r base.in fonttools==4.56.0 # via matplotlib @@ -57,7 +57,7 @@ lazy-loader==0.4 # scippneutron locket==1.0.0 # via partd -matplotlib==3.10.0 +matplotlib==3.10.1 # via # mpltoolbox # plopp diff --git a/requirements/docs.txt b/requirements/docs.txt index 11002120..48ccceb9 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -54,7 +54,7 @@ ipydatawidgets==4.3.5 # via pythreejs ipykernel==6.29.5 # via -r docs.in -ipython==8.32.0 +ipython==8.33.0 # via # -r docs.in # ipykernel diff --git a/requirements/nightly.txt b/requirements/nightly.txt index 376e00e8..d950bf65 100644 --- a/requirements/nightly.txt +++ b/requirements/nightly.txt @@ -32,7 +32,7 @@ dnspython==2.7.0 # via email-validator email-validator==2.2.0 # via scippneutron -essreduce==25.2.4 +essreduce==25.2.5 # via -r nightly.in exceptiongroup==1.2.2 # via pytest @@ -64,7 +64,7 @@ lazy-loader==0.4 # scippneutron locket==1.0.0 # via partd -matplotlib==3.10.0 +matplotlib==3.10.1 # via # mpltoolbox # plopp