From 90d14492920bfb0e94279635903048624a474ffe Mon Sep 17 00:00:00 2001 From: jstilley Date: Fri, 29 Mar 2024 10:36:18 -0700 Subject: [PATCH] Renaming Database3 to Database --- armi/bookkeeping/db/__init__.py | 10 +- armi/bookkeeping/db/compareDB.py | 512 +++++ armi/bookkeeping/db/compareDB3.py | 506 +---- armi/bookkeeping/db/database.py | 1749 +++++++++++++++++ armi/bookkeeping/db/database3.py | 1744 +--------------- armi/bookkeeping/db/databaseInterface.py | 22 +- armi/bookkeeping/db/factory.py | 6 +- armi/bookkeeping/db/layout.py | 10 +- .../{test_comparedb3.py => test_comparedb.py} | 2 +- .../{test_database3.py => test_database.py} | 36 +- .../db/tests/test_databaseInterface.py | 16 +- armi/bookkeeping/db/tests/test_layout.py | 4 +- armi/bookkeeping/mainInterface.py | 4 +- .../visualization/tests/test_vis.py | 4 +- armi/bookkeeping/visualization/vtk.py | 6 +- armi/bookkeeping/visualization/xdmf.py | 6 +- armi/cli/database.py | 8 +- armi/context.py | 6 +- .../parameters/parameterDefinitions.py | 6 +- armi/reactor/tests/test_parameters.py | 2 +- armi/tests/tutorials/data_model.ipynb | 2 +- doc/release/0.3.rst | 3 +- doc/user/outputs.rst | 10 +- 23 files changed, 2365 insertions(+), 2309 deletions(-) create mode 100644 armi/bookkeeping/db/compareDB.py create mode 100644 armi/bookkeeping/db/database.py rename armi/bookkeeping/db/tests/{test_comparedb3.py => test_comparedb.py} (99%) rename armi/bookkeeping/db/tests/{test_database3.py => test_database.py} (95%) diff --git a/armi/bookkeeping/db/__init__.py b/armi/bookkeeping/db/__init__.py index ca0f5b4ed..05f7ba9d9 100644 --- a/armi/bookkeeping/db/__init__.py +++ b/armi/bookkeeping/db/__init__.py @@ -64,14 +64,14 @@ from armi import runLog # re-export package components for easier import -from armi.bookkeeping.db.database3 import Database3 +from armi.bookkeeping.db.database import Database from armi.bookkeeping.db.databaseInterface import DatabaseInterface -from armi.bookkeeping.db.compareDB3 import compareDatabases +from armi.bookkeeping.db.compareDB import compareDatabases from armi.bookkeeping.db.factory import databaseFactory __all__ = [ - "Database3", + "Database", "DatabaseInterface", "compareDatabases", "databaseFactory", @@ -129,7 +129,7 @@ def loadOperator(pathToDb, loadCycle, loadNode, allowMissing=False): "of the database." ) - db = Database3(pathToDb, "r") + db = Database(pathToDb, "r") with db: # init Case here as it keeps track of execution time and assigns a reactor # attribute. This attribute includes the time it takes to initialize the reactor @@ -167,7 +167,7 @@ def _getH5File(db): All this being said, we are probably violating this already with genAuxiliaryData, but we have to start somewhere. """ - if isinstance(db, Database3): + if isinstance(db, Database): return db.h5db else: raise TypeError("Unsupported Database type ({})!".format(type(db))) diff --git a/armi/bookkeeping/db/compareDB.py b/armi/bookkeeping/db/compareDB.py new file mode 100644 index 000000000..de3892e8e --- /dev/null +++ b/armi/bookkeeping/db/compareDB.py @@ -0,0 +1,512 @@ +# Copyright 2019 TerraPower, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Use the generic database class to compare two ARMI databases. + +This assumes some intimate knowledge about how the database is structured internally. +For instance, it knows that the database is composed of HDF5 data (the attrs of a +dataset are used, and h5py Groups are indexed), and it knows how special data is +structured within the HDF5 dataset and what the corresponding attributes are used for. +Some of this could be easily pulled up to the public interfaces of the Database class, +which may allow for cross-version database checking, but there is probably little value +in doing so if one is able to convert between versions. + +Speaking of conversions, there are some common issues that may arise from comparing +against databases that were converted from an old version. The process of reading in the +old database values can sometimes lead to more parameters being written out to the new +database than were in the original database (set to the parameter's default value). That +means that one generally should not be worried about a converted database having more +parameters in it that the one produced directly may not, assuming that the extra +converted parameters are the default. Also, especially at the Component level, some of +the parameters are expected to be different. Specifically the following: + +* temperatures: The old database format simply did not store these on the component + level, so when converting a database, the components in a block will uniformly get + whatever the Block temperature was. +* serial numbers: At all levels, we cannot really expect the serial numbers to line + up from object to object. These are not really supposed to be the same. +* volume: Component volumes also are not stored on the database, and come from + temperatures +* memory usage: Relatively self-evident. Resource usage will vary from run to run, + even if the code hasn't changed. + +""" +from typing import Sequence, Optional, Pattern, Tuple +import collections +import os +import re +import traceback + +from tabulate import tabulate +import h5py +import numpy + +from armi import runLog +from armi.bookkeeping.db import database +from armi.bookkeeping.db.database import Database +from armi.bookkeeping.db.factory import databaseFactory +from armi.bookkeeping.db.permissions import Permissions +from armi.reactor.composites import ArmiObject + + +class OutputWriter: + """Basically a tee to writeln to runLog and the output file.""" + + def __init__(self, fname): + self.fname = fname + self._stream = None + + def __enter__(self): + self._stream = open(self.fname, "w") + return self + + def __exit__(self, *args): + self._stream.close() + + def writeln(self, msg: str) -> None: + runLog.info(msg) + self._stream.write(msg) + self._stream.write("\n") + + +class DiffResults: + """Utility class for storing differences between database data. + + This class is used to store the differences between reference data and other + ("source") data. It is configured with a tolerance, below which differences are + ignored. Differences that exceed the tolerance are stored in a collection of + differences, organized by time step to be outputted later. It also keeps track of + the number of issues that may have been encountered in attempting to compare two + databases. For instance, missing datasets on one database or the other, or datasets + with incompatible dimensions and the like. + + All differences are based on a weird type of relative difference, which uses the + mean of the reference and source data elements as the normalization value: + 2*(C-E)/(C+E). This is somewhat strange, in that if the two are very different, the + reported relative difference will be smaller than expected. It does have the useful + property that if the reference value is zero and the source value is non-zero, the + diff will not be infinite. We do not typically report these in any rigorous manner, + so this should be fine, though we may wish to revisit this in the future. + """ + + def __init__(self, tolerance): + self._columns = [] + self._structureDiffs = [] + self.tolerance = tolerance + # diffs is a dictionary, keyed on strings describing the object to which the + # diffs apply, and the different diff metrics that we use (e.g. mean(abs(diff)), + # max(abs(diff))), with the values being a list of diffs by time step. If the + # diff doesn't exceed the tolerance, a None is inserted instead. + self.diffs = collections.defaultdict(self._getDefault) + + def addDiff( + self, compType: str, paramName: str, absMean: float, mean: float, absMax: float + ) -> None: + """Add a collection of diffs to the diff dictionary if they exceed the tolerance.""" + absMean = absMean if absMean > self.tolerance else None + self.diffs["{}/{} mean(abs(diff))".format(compType, paramName)].append(absMean) + + mean = mean if abs(mean) > self.tolerance else None + self.diffs["{}/{} mean(diff)".format(compType, paramName)].append(mean) + + absMax = absMax if absMax > self.tolerance else None + self.diffs["{}/{} max(abs(diff))".format(compType, paramName)].append(absMax) + + def addStructureDiffs(self, nDiffs: int) -> None: + if not self._structureDiffs: + self._structureDiffs = [0] + + self._structureDiffs[-1] += nDiffs + + def addTimeStep(self, tsName: str) -> None: + self._structureDiffs.append(0) + self._columns.append(tsName) + + def _getDefault(self) -> list: + return [None] * (len(self._columns) - 1) + + def reportDiffs(self, stream: OutputWriter) -> None: + """Print out a well-formatted table of the non-zero diffs.""" + # filter out empty rows + diffsToPrint = { + key: value + for key, value in self.diffs.items() + if not all(v is None for v in value) + } + stream.writeln( + tabulate( + [k.split() + val for k, val in sorted(diffsToPrint.items())], + headers=self._columns, + ) + ) + + def nDiffs(self) -> int: + """Return the number of differences that exceeded the tolerance.""" + return sum( + 1 for _, value in self.diffs.items() if any(v is not None for v in value) + ) + sum(self._structureDiffs) + + +def compareDatabases( + refFileName: str, + srcFileName: str, + exclusions: Optional[Sequence[str]] = None, + tolerance: float = 0.0, + timestepCompare: Optional[Sequence[Tuple[int, int]]] = None, +) -> Optional[DiffResults]: + """High-level method to compare two ARMI H5 files, given file paths.""" + compiledExclusions = None + if exclusions is not None: + compiledExclusions = [re.compile(ex) for ex in exclusions] + + outputName = ( + os.path.basename(refFileName) + "_vs_" + os.path.basename(srcFileName) + ".txt" + ) + + diffResults = DiffResults(tolerance) + with OutputWriter(outputName) as out: + ref = databaseFactory(refFileName, Permissions.READ_ONLY_FME) + src = databaseFactory(srcFileName, Permissions.READ_ONLY_FME) + if not isinstance(ref, Database) or not isinstance(src, Database): + raise TypeError( + "This database comparer only knows how to deal with database version " + "3; received {} and {}".format(type(ref), type(src)) + ) + + with ref, src: + if not timestepCompare: + _, nDiff = _compareH5Groups(out, ref, src, "timesteps") + + if nDiff > 0: + runLog.warning( + "{} and {} have differing timestep groups, and are " + "probably not safe to compare. This is likely due to one of " + "the cases having failed to complete.".format(ref, src) + ) + return None + + for refGroup, srcGroup in zip( + ref.genTimeStepGroups(timeSteps=timestepCompare), + src.genTimeStepGroups(timeSteps=timestepCompare), + ): + runLog.info( + f"Comparing ref time step {refGroup.name.split('/')[1]} to src time " + f"step {srcGroup.name.split('/')[1]}" + ) + diffResults.addTimeStep(refGroup.name) + _compareTimeStep( + out, refGroup, srcGroup, diffResults, exclusions=compiledExclusions + ) + + diffResults.reportDiffs(out) + + return diffResults + + +def _compareH5Groups( + out: OutputWriter, ref: h5py.Group, src: h5py.Group, name: str +) -> Tuple[Sequence[str], int]: + refGroups = set(ref.keys()) + srcGroups = set(src.keys()) + + n = _compareSets(srcGroups, refGroups, out, name) + + return sorted(refGroups & srcGroups), n + + +def _compareTimeStep( + out: OutputWriter, + refGroup: h5py.Group, + srcGroup: h5py.Group, + diffResults: DiffResults, + exclusions: Optional[Sequence[Pattern]] = None, +): + groupNames, structDiffs = _compareH5Groups( + out, refGroup, srcGroup, "composite objects/auxiliary data" + ) + diffResults.addStructureDiffs(structDiffs) + + componentTypes = {gn for gn in groupNames if gn in ArmiObject.TYPES} + auxData = set(groupNames) - componentTypes + auxData.discard("layout") + + for componentType in componentTypes: + refTypeGroup = refGroup[componentType] + srcTypeGroup = srcGroup[componentType] + + _compareComponentData( + out, refTypeGroup, srcTypeGroup, diffResults, exclusions=exclusions + ) + + for aux in auxData: + _compareAuxData(out, refGroup[aux], srcGroup[aux], diffResults) + + +def _compareAuxData( + out: OutputWriter, + refGroup: h5py.Group, + srcGroup: h5py.Group, + diffResults: DiffResults, +): + """ + Compare auxiliary datasets, which aren't stored as Parameters on the Composite model. + + Some parts of ARMI directly create HDF5 groups under the time step group to store + arbitrary data. These still need to be compared. Missing datasets will be treated as + structure differences and reported. + """ + data = dict() + + def visitor(name, obj): + if isinstance(obj, h5py.Dataset): + data[name] = obj + + refGroup.visititems(visitor) + refData = data + + data = dict() + srcGroup.visititems(visitor) + srcData = data + + n = _compareSets( + set(srcData.keys()), set(refData.keys()), out, name="auxiliary dataset" + ) + diffResults.addStructureDiffs(n) + matchedSets = set(srcData.keys()) & set(refData.keys()) + for name in matchedSets: + _diffSimpleData(refData[name], srcData[name], diffResults) + + +def _compareSets( + src: set, ref: set, out: OutputWriter, name: Optional[str] = None +) -> int: + nDiffs = 0 + printName = "" if name is None else name + " " + if ref - src: + nDiffs += len(ref - src) + out.writeln("ref has {}not in src: {}".format(printName, list(ref - src))) + + if src - ref: + nDiffs += len(src - ref) + out.writeln("src has {}not in ref: {}".format(printName, list(src - ref))) + + return nDiffs + + +def _diffSpecialData( + refData: h5py.Dataset, + srcData: h5py.Dataset, + out: OutputWriter, + diffResults: DiffResults, +): + """ + Compare specially-formatted datasets. + + This employs the pack/unpackSpecialData functions to reconstitute complicated + datasets for comparison. These usually don't behave well as giant numpy arrays, so + we go element-by-element to calculate the diffs, then concatenate them. + """ + name = refData.name + paramName = refData.name.split("/")[-1] + compName = refData.name.split("/")[-2] + + nDiffs = _compareSets( + set(srcData.attrs.keys()), set(refData.attrs.keys()), out, "formatting data" + ) + keysMatch = nDiffs == 0 + diffResults.addStructureDiffs(nDiffs) + + if not keysMatch: + diffResults.addDiff(name, name, numpy.inf, numpy.inf, numpy.inf) + return + + if srcData.attrs.get("dict", False): + # not bothering with dictionaries yet, though we will need to for things like + # number densities + return + + attrsMatch = True + for k, srcAttr in srcData.attrs.items(): + refAttr = refData.attrs[k] + + if isinstance(srcAttr, numpy.ndarray) and isinstance(refAttr, numpy.ndarray): + srcFlat = srcAttr.flatten() + refFlat = refAttr.flatten() + if len(srcFlat) != len(refFlat): + same = False + else: + same = all(srcFlat == refFlat) + else: + same = srcAttr == refAttr + + if not same: + attrsMatch = False + out.writeln( + "Special formatting parameters for {} do not match for {}. Src: {} " + "Ref: {}".format(name, k, srcData.attrs[k], refData.attrs[k]) + ) + break + + if not attrsMatch: + return + + try: + src = database.unpackSpecialData(srcData[()], srcData.attrs, paramName) + ref = database.unpackSpecialData(refData[()], refData.attrs, paramName) + except Exception: + runLog.error( + f"Unable to unpack special data for paramName {paramName}. " + f"{traceback.format_exc()}", + ) + return + + diff = [] + for dSrc, dRef in zip(src.tolist(), ref.tolist()): + if isinstance(dSrc, numpy.ndarray) and isinstance(dRef, numpy.ndarray): + if dSrc.shape != dRef.shape: + out.writeln("Shapes did not match for {}".format(refData)) + diffResults.addDiff( + compName, paramName, numpy.inf, numpy.inf, numpy.inf + ) + return + + # make sure not to try to compare empty arrays. Numpy is mediocre at + # these; they are super degenerate and cannot participate in concatenation. + # Why? + if 0 not in dSrc.shape: + # Use the mean of the two to calc relative error. This is more robust to + # changes that cause one of the values to be zero, while the other is + # non-zero, leading to infinite relative error + dMean = (dSrc + dRef) / 2 + diff.append((dSrc - dRef) / dMean) + continue + + if (dSrc is None) ^ (dRef is None): + out.writeln("Mismatched Nones for {} in {}".format(paramName, compName)) + diff.append([numpy.inf]) + continue + + if dSrc is None: + diff.append([0.0]) + continue + + try: + # Use mean to avoid some infinities; see above + dMean = (dSrc + dRef) / 2 + diff.append([(dSrc - dRef) / dMean]) + except ZeroDivisionError: + if dSrc == dRef: + diff.append([0.0]) + else: + diff.append([numpy.inf]) + + if diff: + try: + diff = [numpy.array(d).flatten() for d in diff] + diff = numpy.concatenate(diff) + except ValueError as e: + out.writeln( + "Failed to concatenate diff data for {} in {}: {}".format( + paramName, compName, diff + ) + ) + out.writeln("Because: {}".format(e)) + return + absDiff = numpy.abs(diff) + mean = numpy.nanmean(diff) + absMax = numpy.nanmax(absDiff) + absMean = numpy.nanmean(absDiff) + + diffResults.addDiff(compName, paramName, absMean, mean, absMax) + + +def _diffSimpleData(ref: h5py.Dataset, src: h5py.Dataset, diffResults: DiffResults): + paramName = ref.name.split("/")[-1] + compName = ref.name.split("/")[-2] + + try: + # use mean to avoid some unnecessary infinities + mean = (src[()] + ref[()]) / 2.0 + diff = (src[()] - ref[()]) / mean + except TypeError: + # Strings are persnickety + if src.dtype.kind == ref.dtype.kind and src.dtype.kind in {"U", "S"}: + return + else: + runLog.error("Failed to compare {} in {}".format(paramName, compName)) + runLog.error("source: {}".format(src)) + runLog.error("reference: {}".format(ref)) + diff = numpy.array([numpy.inf]) + except ValueError: + runLog.error("Failed to compare {} in {}".format(paramName, compName)) + runLog.error("source: {}".format(src)) + runLog.error("reference: {}".format(ref)) + diff = numpy.array([numpy.inf]) + + if 0 in diff.shape: + # Empty list, no diff + return + + absDiff = numpy.abs(diff) + mean = numpy.nanmean(diff) + absMax = numpy.nanmax(absDiff) + absMean = numpy.nanmean(absDiff) + + diffResults.addDiff(compName, paramName, absMean, mean, absMax) + + +def _compareComponentData( + out: OutputWriter, + refGroup: h5py.Group, + srcGroup: h5py.Group, + diffResults: DiffResults, + exclusions: Optional[Sequence[Pattern]] = None, +): + exclusions = exclusions or [] + compName = refGroup.name + paramNames, nDiff = _compareH5Groups( + out, refGroup, srcGroup, "{} parameters".format(compName) + ) + diffResults.addStructureDiffs(nDiff) + + for paramName in paramNames: + fullName = "/".join((refGroup.name, paramName)) + if any(pattern.match(fullName) for pattern in exclusions): + runLog.debug( + "Skipping comparison of {} since it is being ignored.".format(fullName) + ) + continue + refDataset = refGroup[paramName] + srcDataset = srcGroup[paramName] + + srcSpecial = srcDataset.attrs.get("specialFormatting", False) + refSpecial = refDataset.attrs.get("specialFormatting", False) + + if srcSpecial ^ refSpecial: + out.writeln( + "Could not compare data for parameter {} because one uses special " + "formatting, and the other does not. Ref: {} Src: {}".format( + paramName, refSpecial, srcSpecial + ) + ) + diffResults.addDiff( + refGroup.name, paramName, numpy.inf, numpy.inf, numpy.inf + ) + continue + + if srcSpecial or refSpecial: + _diffSpecialData(refDataset, srcDataset, out, diffResults) + else: + _diffSimpleData(refDataset, srcDataset, diffResults) diff --git a/armi/bookkeeping/db/compareDB3.py b/armi/bookkeeping/db/compareDB3.py index f46cb23c0..ef86fbb90 100644 --- a/armi/bookkeeping/db/compareDB3.py +++ b/armi/bookkeeping/db/compareDB3.py @@ -1,4 +1,4 @@ -# Copyright 2019 TerraPower, LLC +# Copyright 2024 TerraPower, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,502 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""This is a temporary file created to ease a long API transition. -""" -Use the generic database class to compare two ARMI databases. - -This assumes some intimate knowledge about how the database is structured internally. -For instance, it knows that the database is composed of HDF5 data (the attrs of a -dataset are used, and h5py Groups are indexed), and it knows how special data is -structured within the HDF5 dataset and what the corresponding attributes are used for. -Some of this could be easily pulled up to the public interfaces of the Database class, -which may allow for cross-version database checking, but there is probably little value -in doing so if one is able to convert between versions. - -Speaking of conversions, there are some common issues that may arise from comparing -against databases that were converted from an old version. The process of reading in the -old database values can sometimes lead to more parameters being written out to the new -database than were in the original database (set to the parameter's default value). That -means that one generally should not be worried about a converted database having more -parameters in it that the one produced directly may not, assuming that the extra -converted parameters are the default. Also, especially at the Component level, some of -the parameters are expected to be different. Specifically the following: - -* temperatures: The old database format simply did not store these on the component - level, so when converting a database, the components in a block will uniformly get - whatever the Block temperature was. -* serial numbers: At all levels, we cannot really expect the serial numbers to line - up from object to object. These are not really supposed to be the same. -* volume: Component volumes also are not stored on the database, and come from - temperatures -* memory usage: Relatively self-evident. Resource usage will vary from run to run, - even if the code hasn't changed. +Originally, the ``Database3`` class existed as a temporary naming stop-gap as ARMI transitioned from +one version of a "Database" class to another. But, for reasons lost to history, the "Database3" name +stuck. +And the file ``compareDB3.py`` was named to match the ``Database3`` naming convention. """ -from typing import Sequence, Optional, Pattern, Tuple -import collections -import os -import re -import traceback - -from tabulate import tabulate -import h5py -import numpy - -from armi import runLog -from armi.bookkeeping.db import database3 -from armi.bookkeeping.db.database3 import Database3 -from armi.bookkeeping.db.factory import databaseFactory -from armi.bookkeeping.db.permissions import Permissions -from armi.reactor.composites import ArmiObject - - -class OutputWriter: - """Basically a tee to writeln to runLog and the output file.""" - - def __init__(self, fname): - self.fname = fname - self._stream = None - - def __enter__(self): - self._stream = open(self.fname, "w") - return self - - def __exit__(self, *args): - self._stream.close() - - def writeln(self, msg: str) -> None: - runLog.info(msg) - self._stream.write(msg) - self._stream.write("\n") - - -class DiffResults: - """Utility class for storing differences between database data. - - This class is used to store the differences between reference data and other - ("source") data. It is configured with a tolerance, below which differences are - ignored. Differences that exceed the tolerance are stored in a collection of - differences, organized by time step to be outputted later. It also keeps track of - the number of issues that may have been encountered in attempting to compare two - databases. For instance, missing datasets on one database or the other, or datasets - with incompatible dimensions and the like. - - All differences are based on a weird type of relative difference, which uses the - mean of the reference and source data elements as the normalization value: - 2*(C-E)/(C+E). This is somewhat strange, in that if the two are very different, the - reported relative difference will be smaller than expected. It does have the useful - property that if the reference value is zero and the source value is non-zero, the - diff will not be infinite. We do not typically report these in any rigorous manner, - so this should be fine, though we may wish to revisit this in the future. - """ - - def __init__(self, tolerance): - self._columns = [] - self._structureDiffs = [] - self.tolerance = tolerance - # diffs is a dictionary, keyed on strings describing the object to which the - # diffs apply, and the different diff metrics that we use (e.g. mean(abs(diff)), - # max(abs(diff))), with the values being a list of diffs by time step. If the - # diff doesn't exceed the tolerance, a None is inserted instead. - self.diffs = collections.defaultdict(self._getDefault) - - def addDiff( - self, compType: str, paramName: str, absMean: float, mean: float, absMax: float - ) -> None: - """Add a collection of diffs to the diff dictionary if they exceed the tolerance.""" - absMean = absMean if absMean > self.tolerance else None - self.diffs["{}/{} mean(abs(diff))".format(compType, paramName)].append(absMean) - - mean = mean if abs(mean) > self.tolerance else None - self.diffs["{}/{} mean(diff)".format(compType, paramName)].append(mean) - - absMax = absMax if absMax > self.tolerance else None - self.diffs["{}/{} max(abs(diff))".format(compType, paramName)].append(absMax) - - def addStructureDiffs(self, nDiffs: int) -> None: - if not self._structureDiffs: - self._structureDiffs = [0] - - self._structureDiffs[-1] += nDiffs - - def addTimeStep(self, tsName: str) -> None: - self._structureDiffs.append(0) - self._columns.append(tsName) - - def _getDefault(self) -> list: - return [None] * (len(self._columns) - 1) - - def reportDiffs(self, stream: OutputWriter) -> None: - """Print out a well-formatted table of the non-zero diffs.""" - # filter out empty rows - diffsToPrint = { - key: value - for key, value in self.diffs.items() - if not all(v is None for v in value) - } - stream.writeln( - tabulate( - [k.split() + val for k, val in sorted(diffsToPrint.items())], - headers=self._columns, - ) - ) - - def nDiffs(self) -> int: - """Return the number of differences that exceeded the tolerance.""" - return sum( - 1 for _, value in self.diffs.items() if any(v is not None for v in value) - ) + sum(self._structureDiffs) - - -def compareDatabases( - refFileName: str, - srcFileName: str, - exclusions: Optional[Sequence[str]] = None, - tolerance: float = 0.0, - timestepCompare: Optional[Sequence[Tuple[int, int]]] = None, -) -> Optional[DiffResults]: - """High-level method to compare two ARMI H5 files, given file paths.""" - compiledExclusions = None - if exclusions is not None: - compiledExclusions = [re.compile(ex) for ex in exclusions] - - outputName = ( - os.path.basename(refFileName) + "_vs_" + os.path.basename(srcFileName) + ".txt" - ) - - diffResults = DiffResults(tolerance) - with OutputWriter(outputName) as out: - ref = databaseFactory(refFileName, Permissions.READ_ONLY_FME) - src = databaseFactory(srcFileName, Permissions.READ_ONLY_FME) - if not isinstance(ref, Database3) or not isinstance(src, Database3): - raise TypeError( - "This database comparer only knows how to deal with database version " - "3; received {} and {}".format(type(ref), type(src)) - ) - - with ref, src: - if not timestepCompare: - _, nDiff = _compareH5Groups(out, ref, src, "timesteps") - - if nDiff > 0: - runLog.warning( - "{} and {} have differing timestep groups, and are " - "probably not safe to compare. This is likely due to one of " - "the cases having failed to complete.".format(ref, src) - ) - return None - - for refGroup, srcGroup in zip( - ref.genTimeStepGroups(timeSteps=timestepCompare), - src.genTimeStepGroups(timeSteps=timestepCompare), - ): - runLog.info( - f"Comparing ref time step {refGroup.name.split('/')[1]} to src time " - f"step {srcGroup.name.split('/')[1]}" - ) - diffResults.addTimeStep(refGroup.name) - _compareTimeStep( - out, refGroup, srcGroup, diffResults, exclusions=compiledExclusions - ) - - diffResults.reportDiffs(out) - - return diffResults - - -def _compareH5Groups( - out: OutputWriter, ref: h5py.Group, src: h5py.Group, name: str -) -> Tuple[Sequence[str], int]: - refGroups = set(ref.keys()) - srcGroups = set(src.keys()) - - n = _compareSets(srcGroups, refGroups, out, name) - - return sorted(refGroups & srcGroups), n - - -def _compareTimeStep( - out: OutputWriter, - refGroup: h5py.Group, - srcGroup: h5py.Group, - diffResults: DiffResults, - exclusions: Optional[Sequence[Pattern]] = None, -): - groupNames, structDiffs = _compareH5Groups( - out, refGroup, srcGroup, "composite objects/auxiliary data" - ) - diffResults.addStructureDiffs(structDiffs) - - componentTypes = {gn for gn in groupNames if gn in ArmiObject.TYPES} - auxData = set(groupNames) - componentTypes - auxData.discard("layout") - - for componentType in componentTypes: - refTypeGroup = refGroup[componentType] - srcTypeGroup = srcGroup[componentType] - - _compareComponentData( - out, refTypeGroup, srcTypeGroup, diffResults, exclusions=exclusions - ) - - for aux in auxData: - _compareAuxData(out, refGroup[aux], srcGroup[aux], diffResults) - - -def _compareAuxData( - out: OutputWriter, - refGroup: h5py.Group, - srcGroup: h5py.Group, - diffResults: DiffResults, -): - """ - Compare auxiliary datasets, which aren't stored as Parameters on the Composite model. - - Some parts of ARMI directly create HDF5 groups under the time step group to store - arbitrary data. These still need to be compared. Missing datasets will be treated as - structure differences and reported. - """ - data = dict() - - def visitor(name, obj): - if isinstance(obj, h5py.Dataset): - data[name] = obj - - refGroup.visititems(visitor) - refData = data - - data = dict() - srcGroup.visititems(visitor) - srcData = data - - n = _compareSets( - set(srcData.keys()), set(refData.keys()), out, name="auxiliary dataset" - ) - diffResults.addStructureDiffs(n) - matchedSets = set(srcData.keys()) & set(refData.keys()) - for name in matchedSets: - _diffSimpleData(refData[name], srcData[name], diffResults) - - -def _compareSets( - src: set, ref: set, out: OutputWriter, name: Optional[str] = None -) -> int: - nDiffs = 0 - printName = "" if name is None else name + " " - if ref - src: - nDiffs += len(ref - src) - out.writeln("ref has {}not in src: {}".format(printName, list(ref - src))) - - if src - ref: - nDiffs += len(src - ref) - out.writeln("src has {}not in ref: {}".format(printName, list(src - ref))) - - return nDiffs - - -def _diffSpecialData( - refData: h5py.Dataset, - srcData: h5py.Dataset, - out: OutputWriter, - diffResults: DiffResults, -): - """ - Compare specially-formatted datasets. - - This employs the pack/unpackSpecialData functions to reconstitute complicated - datasets for comparison. These usually don't behave well as giant numpy arrays, so - we go element-by-element to calculate the diffs, then concatenate them. - """ - name = refData.name - paramName = refData.name.split("/")[-1] - compName = refData.name.split("/")[-2] - - nDiffs = _compareSets( - set(srcData.attrs.keys()), set(refData.attrs.keys()), out, "formatting data" - ) - keysMatch = nDiffs == 0 - diffResults.addStructureDiffs(nDiffs) - - if not keysMatch: - diffResults.addDiff(name, name, numpy.inf, numpy.inf, numpy.inf) - return - - if srcData.attrs.get("dict", False): - # not bothering with dictionaries yet, though we will need to for things like - # number densities - return - - attrsMatch = True - for k, srcAttr in srcData.attrs.items(): - refAttr = refData.attrs[k] - - if isinstance(srcAttr, numpy.ndarray) and isinstance(refAttr, numpy.ndarray): - srcFlat = srcAttr.flatten() - refFlat = refAttr.flatten() - if len(srcFlat) != len(refFlat): - same = False - else: - same = all(srcFlat == refFlat) - else: - same = srcAttr == refAttr - - if not same: - attrsMatch = False - out.writeln( - "Special formatting parameters for {} do not match for {}. Src: {} " - "Ref: {}".format(name, k, srcData.attrs[k], refData.attrs[k]) - ) - break - - if not attrsMatch: - return - - try: - src = database3.unpackSpecialData(srcData[()], srcData.attrs, paramName) - ref = database3.unpackSpecialData(refData[()], refData.attrs, paramName) - except Exception: - runLog.error( - f"Unable to unpack special data for paramName {paramName}. " - f"{traceback.format_exc()}", - ) - return - - diff = [] - for dSrc, dRef in zip(src.tolist(), ref.tolist()): - if isinstance(dSrc, numpy.ndarray) and isinstance(dRef, numpy.ndarray): - if dSrc.shape != dRef.shape: - out.writeln("Shapes did not match for {}".format(refData)) - diffResults.addDiff( - compName, paramName, numpy.inf, numpy.inf, numpy.inf - ) - return - - # make sure not to try to compare empty arrays. Numpy is mediocre at - # these; they are super degenerate and cannot participate in concatenation. - # Why? - if 0 not in dSrc.shape: - # Use the mean of the two to calc relative error. This is more robust to - # changes that cause one of the values to be zero, while the other is - # non-zero, leading to infinite relative error - dMean = (dSrc + dRef) / 2 - diff.append((dSrc - dRef) / dMean) - continue - - if (dSrc is None) ^ (dRef is None): - out.writeln("Mismatched Nones for {} in {}".format(paramName, compName)) - diff.append([numpy.inf]) - continue - - if dSrc is None: - diff.append([0.0]) - continue - - try: - # Use mean to avoid some infinities; see above - dMean = (dSrc + dRef) / 2 - diff.append([(dSrc - dRef) / dMean]) - except ZeroDivisionError: - if dSrc == dRef: - diff.append([0.0]) - else: - diff.append([numpy.inf]) - - if diff: - try: - diff = [numpy.array(d).flatten() for d in diff] - diff = numpy.concatenate(diff) - except ValueError as e: - out.writeln( - "Failed to concatenate diff data for {} in {}: {}".format( - paramName, compName, diff - ) - ) - out.writeln("Because: {}".format(e)) - return - absDiff = numpy.abs(diff) - mean = numpy.nanmean(diff) - absMax = numpy.nanmax(absDiff) - absMean = numpy.nanmean(absDiff) - - diffResults.addDiff(compName, paramName, absMean, mean, absMax) - - -def _diffSimpleData(ref: h5py.Dataset, src: h5py.Dataset, diffResults: DiffResults): - paramName = ref.name.split("/")[-1] - compName = ref.name.split("/")[-2] - - try: - # use mean to avoid some unnecessary infinities - mean = (src[()] + ref[()]) / 2.0 - diff = (src[()] - ref[()]) / mean - except TypeError: - # Strings are persnickety - if src.dtype.kind == ref.dtype.kind and src.dtype.kind in {"U", "S"}: - return - else: - runLog.error("Failed to compare {} in {}".format(paramName, compName)) - runLog.error("source: {}".format(src)) - runLog.error("reference: {}".format(ref)) - diff = numpy.array([numpy.inf]) - except ValueError: - runLog.error("Failed to compare {} in {}".format(paramName, compName)) - runLog.error("source: {}".format(src)) - runLog.error("reference: {}".format(ref)) - diff = numpy.array([numpy.inf]) - - if 0 in diff.shape: - # Empty list, no diff - return - - absDiff = numpy.abs(diff) - mean = numpy.nanmean(diff) - absMax = numpy.nanmax(absDiff) - absMean = numpy.nanmean(absDiff) - - diffResults.addDiff(compName, paramName, absMean, mean, absMax) - - -def _compareComponentData( - out: OutputWriter, - refGroup: h5py.Group, - srcGroup: h5py.Group, - diffResults: DiffResults, - exclusions: Optional[Sequence[Pattern]] = None, -): - exclusions = exclusions or [] - compName = refGroup.name - paramNames, nDiff = _compareH5Groups( - out, refGroup, srcGroup, "{} parameters".format(compName) - ) - diffResults.addStructureDiffs(nDiff) - - for paramName in paramNames: - fullName = "/".join((refGroup.name, paramName)) - if any(pattern.match(fullName) for pattern in exclusions): - runLog.debug( - "Skipping comparison of {} since it is being ignored.".format(fullName) - ) - continue - refDataset = refGroup[paramName] - srcDataset = srcGroup[paramName] - - srcSpecial = srcDataset.attrs.get("specialFormatting", False) - refSpecial = refDataset.attrs.get("specialFormatting", False) - - if srcSpecial ^ refSpecial: - out.writeln( - "Could not compare data for parameter {} because one uses special " - "formatting, and the other does not. Ref: {} Src: {}".format( - paramName, refSpecial, srcSpecial - ) - ) - diffResults.addDiff( - refGroup.name, paramName, numpy.inf, numpy.inf, numpy.inf - ) - continue +# ruff: noqa: F401 +# ruff: noqa: F403 +from armi.bookkeeping.db.compareDB import * - if srcSpecial or refSpecial: - _diffSpecialData(refDataset, srcDataset, out, diffResults) - else: - _diffSimpleData(refDataset, srcDataset, diffResults) +###from armi.bookkeeping.db.compareDB import _compareSets diff --git a/armi/bookkeeping/db/database.py b/armi/bookkeeping/db/database.py new file mode 100644 index 000000000..6ab23ff46 --- /dev/null +++ b/armi/bookkeeping/db/database.py @@ -0,0 +1,1749 @@ +# Copyright 2019 TerraPower, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +ARMI Database implementation, version 3.4. + +A reactor model should be fully recoverable from the database; all the way down to the +component level. As a result, the structure of the underlying data is bound to the +hierarchical Composite Reactor Model. Furthermore, this database format is intended to +be more dynamic, permitting as-yet undeveloped levels and classes in the Composite +Reactor Model to be supported as they are added. More high-level discussion is +contained in :doc:`/user/outputs/database`. + +The :py:class:`Database` class contains most of the functionality for interacting +with the underlying data. This includes things like dumping a Reactor state to the +database and loading it back again, as well as extracting historical data for a given +object or collection of object from the database file. However, for the nitty-gritty +details of how the hierarchical Composite Reactor Model is translated to the flat file +database, please refer to :py:mod:`armi.bookkeeping.db.layout`. + +Refer to :py:mod:`armi.bookkeeping.db` for information about versioning. +""" +import collections +import copy +import io +import itertools +import os +import pathlib +import re +import shutil +import subprocess +import sys +from platform import uname +from typing import ( + Optional, + Tuple, + Type, + Dict, + Any, + List, + Sequence, + Generator, +) + +import h5py +import numpy + +from armi import context +from armi import getApp +from armi import meta +from armi import runLog +from armi import settings +from armi.bookkeeping.db.layout import ( + Layout, + DB_VERSION, + replaceNonesWithNonsense, + replaceNonsenseWithNones, +) +from armi.bookkeeping.db.typedefs import History, Histories +from armi.nucDirectory import nuclideBases +from armi.physics.neutronics.settings import CONF_LOADING_FILE +from armi.reactor import grids +from armi.reactor import parameters +from armi.reactor import systemLayoutInput +from armi.reactor.assemblies import Assembly +from armi.reactor.blocks import Block +from armi.reactor.components import Component +from armi.reactor.composites import ArmiObject +from armi.reactor.flags import Flags +from armi.reactor.parameters import parameterCollections +from armi.reactor.reactors import Core +from armi.settings.fwSettings.globalSettings import CONF_SORT_REACTOR +from armi.utils import getNodesPerCycle +from armi.utils.textProcessors import resolveMarkupInclusions + +# CONSTANTS +_SERIALIZER_NAME = "serializerName" +_SERIALIZER_VERSION = "serializerVersion" + + +def getH5GroupName(cycle: int, timeNode: int, statePointName: str = None) -> str: + """ + Naming convention specifier. + + ARMI defines the naming convention cXXnYY for groups of simulation data. + That is, data is grouped by cycle and time node information during a + simulated run. + """ + return "c{:0>2}n{:0>2}{}".format(cycle, timeNode, statePointName or "") + + +class Database: + """ + Version 3 of the ARMI Database, handling serialization and loading of Reactor states. + + This implementation of the database pushes all objects in the Composite Reactor + Model into the database. This process is aided by the ``Layout`` class, which + handles the packing and unpacking of the structure of the objects, their + relationships, and their non-parameter attributes. + + .. impl:: The database files are H5, and thus language agnostic. + :id: I_ARMI_DB_H51 + :implements: R_ARMI_DB_H5 + + This class implements a light wrapper around H5 files, so they can be used to + store ARMI outputs. H5 files are commonly used in scientific applications in + Fortran and C++. As such, they are entirely language agnostic binary files. The + implementation here is that ARMI wraps the ``h5py`` library, and uses its + extensive tooling, instead of re-inventing the wheel. + + See Also + -------- + `doc/user/outputs/database` for more details. + """ + + timeNodeGroupPattern = re.compile(r"^c(\d\d)n(\d\d)$") + + def __init__(self, fileName: os.PathLike, permission: str): + """ + Create a new Database object. + + Parameters + ---------- + fileName: + name of the file + + permission: + file permissions, write ("w") or read ("r") + """ + self._fileName = fileName + # No full path yet; we will determine this based on FAST_PATH and permissions + self._fullPath: Optional[str] = None + self._permission = permission + self.h5db: Optional[h5py.File] = None + + # Allows context management on open files. + # If context management is used on a file that is already open, it will not reopen + # and it will also not close after leaving that context. + # This allows the treatment of all databases the same whether they are open or + # closed. + self._openCount: int = 0 + + if permission == "w": + self.version = DB_VERSION + else: + # will be set upon read + self._version = None + self._versionMajor = None + self._versionMinor = None + + @property + def version(self) -> str: + return self._version + + @version.setter + def version(self, value: str): + self._version = value + self._versionMajor, self._versionMinor = (int(v) for v in value.split(".")) + + @property + def versionMajor(self): + return self._versionMajor + + @property + def versionMinor(self): + return self._versionMinor + + def __repr__(self): + return "<{} {}>".format( + self.__class__.__name__, repr(self.h5db).replace("<", "").replace(">", "") + ) + + def open(self): + if self.h5db is not None: + raise ValueError( + "This database is already open; make sure to close it " + "before trying to open it again." + ) + filePath = self._fileName + self._openCount += 1 + + if self._permission in {"r", "a"}: + self._fullPath = os.path.abspath(filePath) + self.h5db = h5py.File(filePath, self._permission) + self.version = self.h5db.attrs["databaseVersion"] + return + + if self._permission == "w": + # assume fast path! + filePath = os.path.join(context.getFastPath(), filePath) + self._fullPath = os.path.abspath(filePath) + + else: + runLog.error("Unrecognized file permissions `{}`".format(self._permission)) + raise ValueError( + "Cannot open database with permission `{}`".format(self._permission) + ) + + # open the database, and write a bunch of metadata to it + runLog.info("Opening database file at {}".format(os.path.abspath(filePath))) + self.h5db = h5py.File(filePath, self._permission) + self.h5db.attrs["successfulCompletion"] = False + self.h5db.attrs["version"] = meta.__version__ + self.h5db.attrs["databaseVersion"] = self.version + self.writeSystemAttributes(self.h5db) + + # store app and plugin data + app = getApp() + self.h5db.attrs["appName"] = app.name + plugins = app.pluginManager.list_name_plugin() + ps = [ + (os.path.abspath(sys.modules[p[1].__module__].__file__), p[1].__name__) + for p in plugins + ] + ps = numpy.array([str(p[0]) + ":" + str(p[1]) for p in ps]).astype("S") + self.h5db.attrs["pluginPaths"] = ps + self.h5db.attrs["localCommitHash"] = Database.grabLocalCommitHash() + + @staticmethod + def writeSystemAttributes(h5db): + """Write system attributes to the database. + + .. impl:: Add system attributes to the database. + :id: I_ARMI_DB_QA + :implements: R_ARMI_DB_QA + + This method writes some basic system information to the H5 file. This is + designed as a starting point, so users can see information about the system + their simulations were run on. As ARMI is used on Windows and Linux, the + tooling here has to be platform independent. The two major sources of + information are the ARMI :py:mod:`context ` module and the + Python standard library ``platform``. + """ + h5db.attrs["user"] = context.USER + h5db.attrs["python"] = sys.version + h5db.attrs["armiLocation"] = os.path.dirname(context.ROOT) + h5db.attrs["startTime"] = context.START_TIME + h5db.attrs["machines"] = numpy.array(context.MPI_NODENAMES).astype("S") + + # store platform data + platform_data = uname() + h5db.attrs["platform"] = platform_data.system + h5db.attrs["hostname"] = platform_data.node + h5db.attrs["platformRelease"] = platform_data.release + h5db.attrs["platformVersion"] = platform_data.version + h5db.attrs["platformArch"] = platform_data.processor + + @staticmethod + def grabLocalCommitHash(): + """ + Try to determine the local Git commit. + + We have to be sure to handle the errors where the code is run on a system that + doesn't have Git installed. Or if the code is simply not run from inside a repo. + + Returns + ------- + str + The commit hash if it exists, otherwise "unknown". + """ + unknown = "unknown" + if not shutil.which("git"): + # no git available. cannot check git info + return unknown + repo_exists = ( + subprocess.run( + "git rev-parse --git-dir".split(), + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + and subprocess.run( + ["git", "describe"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode + == 0 + ) + if repo_exists: + try: + commit_hash = subprocess.check_output(["git", "describe"]) + return commit_hash.decode("utf-8").strip() + except: # noqa: bare-except + return unknown + else: + return unknown + + def close(self, completedSuccessfully=False): + """Close the DB and perform cleanups and auto-conversions.""" + self._openCount = 0 + if self.h5db is None: + return + + if self._permission == "w": + self.h5db.attrs["successfulCompletion"] = completedSuccessfully + # a bit redundant to call flush, but with unreliable IO issues, why not? + self.h5db.flush() + + self.h5db.close() + self.h5db = None + + if self._permission == "w": + # move out of the FAST_PATH and into the working directory + newPath = shutil.move(self._fullPath, self._fileName) + self._fullPath = os.path.abspath(newPath) + + def splitDatabase( + self, keepTimeSteps: Sequence[Tuple[int, int]], label: str + ) -> str: + """ + Discard all data except for specific time steps, retaining old data in a separate file. + + This is useful when performing more exotic analyses, where each "time step" may + not represent a specific point in time, but something more nuanced. For example, + equilibrium cases store a new "cycle" for each iteration as it attempts to + converge the equilibrium cycle. At the end of the run, the last "cycle" is the + converged equilibrium cycle, whereas the previous cycles constitute the path to + convergence, which we typically wish to discard before further analysis. + + Parameters + ---------- + keepTimeSteps + A collection of the time steps to retain + + label + An informative label for the backed-up database. Usually something like + "-all-iterations". Will be interposed between the source name and the ".h5" + extension. + + Returns + ------- + str + The name of the new, backed-up database file. + """ + if self.h5db is None: + raise ValueError("There is no open database to split.") + + self.h5db.close() + + backupDBPath = os.path.abspath(label.join(os.path.splitext(self._fileName))) + runLog.info("Retaining full database history in {}".format(backupDBPath)) + if self._fullPath is not None: + shutil.move(self._fullPath, backupDBPath) + + self.h5db = h5py.File(self._fullPath, self._permission) + dbOut = self.h5db + + with h5py.File(backupDBPath, "r") as dbIn: + dbOut.attrs.update(dbIn.attrs) + + # Copy everything except time node data + timeSteps = set() + for groupName, _ in dbIn.items(): + m = self.timeNodeGroupPattern.match(groupName) + if m: + timeSteps.add((int(m.group(1)), int(m.group(2)))) + else: + dbIn.copy(groupName, dbOut) + + if not set(keepTimeSteps).issubset(timeSteps): + raise ValueError( + "Not all desired time steps ({}) are even present in the " + "database".format(keepTimeSteps) + ) + + minCycle = next(iter(sorted(keepTimeSteps)))[0] + for cycle, node in keepTimeSteps: + offsetCycle = cycle - minCycle + offsetGroupName = getH5GroupName(offsetCycle, node) + dbIn.copy(getH5GroupName(cycle, node), dbOut, name=offsetGroupName) + dbOut[offsetGroupName + "/Reactor/cycle"][()] = offsetCycle + + return backupDBPath + + @property + def fileName(self): + return self._fileName + + @fileName.setter + def fileName(self, fName): + if self.h5db is not None: + raise RuntimeError("Cannot change Database file name while it's opened!") + self._fileName = fName + + def loadCS(self): + """Attempt to load settings from the database file. + + Notes + ----- + There are no guarantees here. If the database was written from a different version of ARMI than you are using, + these results may not be usable. For instance, the database could have been written from a vastly old or future + version of ARMI from the code you are using. + """ + cs = settings.Settings() + cs.caseTitle = os.path.splitext(os.path.basename(self.fileName))[0] + try: + cs.loadFromString(self.h5db["inputs/settings"].asstr()[()]) + except KeyError: + # not all paths to writing a database require inputs to be written to the + # database. Technically, settings do affect some of the behavior of database + # reading, so not having the settings that made the reactor that went into + # the database is not ideal. However, this isn't the right place to crash + # into it. Ideally, there would be not way to not have the settings in the + # database (force writing in writeToDB), or to make reading invariant to + # settings. + pass + + return cs + + def loadBlueprints(self): + """Attempt to load reactor blueprints from the database file. + + Notes + ----- + There are no guarantees here. If the database was written from a different version of ARMI than you are using, + these results may not be usable. For instance, the database could have been written from a vastly old or future + version of ARMI from the code you are using. + """ + # Blueprints use the yamlize package, which uses class attributes to define much of the class's behavior + # through metaclassing. Therefore, we need to be able to import all plugins *before* importing blueprints. + from armi.reactor.blueprints import Blueprints + + bpString = None + + try: + bpString = self.h5db["inputs/blueprints"].asstr()[()] + except KeyError: + # not all reactors need to be created from blueprints, so they may not exist + pass + + if not bpString: + # looks like no blueprints contents + return None + + stream = io.StringIO(bpString) + stream = Blueprints.migrate(stream) + + bp = Blueprints.load(stream) + return bp + + def loadGeometry(self): + """ + This is primarily just used for migrations. + + The "geometry files" were replaced by ``systems:`` and ``grids:`` sections of ``Blueprints``. + """ + geom = systemLayoutInput.SystemLayoutInput() + geom.readGeomFromStream(io.StringIO(self.h5db["inputs/geomFile"].asstr()[()])) + return geom + + def writeInputsToDB(self, cs, csString=None, geomString=None, bpString=None): + """ + Write inputs into the database based the Settings. + + This is not DRY on purpose. The goal is that any particular Database + implementation should be very stable, so we dont want it to be easy to change + one Database implementation's behavior when trying to change another's. + + .. impl:: The run settings are saved the settings file. + :id: I_ARMI_DB_CS + :implements: R_ARMI_DB_CS + + A ``Settings`` object is passed into this method, and then the settings are + converted into a YAML string stream. That stream is then written to the H5 + file. Optionally, this method can take a pre-build settings string to be + written directly to the file. + + .. impl:: The reactor blueprints are saved the settings file. + :id: I_ARMI_DB_BP + :implements: R_ARMI_DB_BP + + A ``Blueprints`` string is optionally passed into this method, and then + written to the H5 file. If it is not passed in, this method will attempt to + find the blueprints input file in the settings, and read the contents of + that file into a stream to be written to the H5 file. + + Notes + ----- + This is hard-coded to read the entire file contents into memory and write that + directly into the database. We could have the cs/blueprints/geom write to a + string, however the ARMI log file contains a hash of each files' contents. In + the future, we should be able to reproduce a calculation with confidence that + the inputs are identical. + """ + caseTitle = ( + cs.caseTitle if cs is not None else os.path.splitext(self.fileName)[0] + ) + self.h5db.attrs["caseTitle"] = caseTitle + if csString is None: + # don't read file; use what's in the cs now. + # Sometimes settings are modified in tests. + stream = io.StringIO() + cs.writeToYamlStream(stream) + stream.seek(0) + csString = stream.read() + + if bpString is None: + bpPath = pathlib.Path(cs.inputDirectory) / cs[CONF_LOADING_FILE] + # only store blueprints if we actually loaded from them + if bpPath.exists() and bpPath.is_file(): + # Ensure that the input as stored in the DB is complete + bpString = resolveMarkupInclusions( + pathlib.Path(cs.inputDirectory) / cs[CONF_LOADING_FILE] + ).read() + else: + bpString = "" + + self.h5db["inputs/settings"] = csString + self.h5db["inputs/geomFile"] = geomString or "" + self.h5db["inputs/blueprints"] = bpString + + def readInputsFromDB(self): + return ( + self.h5db["inputs/settings"].asstr()[()], + self.h5db["inputs/geomFile"].asstr()[()], + self.h5db["inputs/blueprints"].asstr()[()], + ) + + def mergeHistory(self, inputDB, startCycle, startNode): + """ + Copy time step data up to, but not including the passed cycle and node. + + Notes + ----- + This is used for restart runs with the standard operator for example. + The current time step (being loaded from) should not be copied, as that + time steps data will be written at the end of the time step. + """ + # iterate over the top level H5Groups and copy + for time, h5ts in zip(inputDB.genTimeSteps(), inputDB.genTimeStepGroups()): + cyc, tn = time + if cyc == startCycle and tn == startNode: + # all data up to current state are merged + return + self.h5db.copy(h5ts, h5ts.name) + + if inputDB.versionMinor < 2: + # The source database may have object references in some attributes. + # make sure to link those up using our manual path strategy. + references = [] + + def findReferences(name, obj): + for key, attr in obj.attrs.items(): + if isinstance(attr, h5py.h5r.Reference): + references.append((name, key, inputDB.h5db[attr].name)) + + h5ts.visititems(findReferences) + + for key, attr, path in references: + destTs = self.h5db[h5ts.name] + destTs[key].attrs[attr] = "@{}".format(path) + + def __enter__(self): + """Context management support.""" + if self._openCount == 0: + # open also increments _openCount + self.open() + else: + self._openCount += 1 + return self + + def __exit__(self, type, value, traceback): + """Typically we don't care why it broke but we want the DB to close.""" + self._openCount -= 1 + # always close if there is a traceback. + if self._openCount == 0 or traceback: + self.close(all(i is None for i in (type, value, traceback))) + + def __del__(self): + if self.h5db is not None: + self.close(False) + + def __delitem__(self, tn: Tuple[int, int, Optional[str]]): + cycle, timeNode, statePointName = tn + name = getH5GroupName(cycle, timeNode, statePointName) + if self.h5db is not None: + del self.h5db[name] + + def genTimeStepGroups( + self, timeSteps: Sequence[Tuple[int, int]] = None + ) -> Generator[h5py._hl.group.Group, None, None]: + """Returns a generator of HDF5 Groups for all time nodes, or for the passed selection.""" + assert ( + self.h5db is not None + ), "Must open the database before calling genTimeStepGroups" + if timeSteps is None: + for groupName, h5TimeNodeGroup in sorted(self.h5db.items()): + match = self.timeNodeGroupPattern.match(groupName) + if match: + yield h5TimeNodeGroup + else: + for step in timeSteps: + yield self.h5db[getH5GroupName(*step)] + + def getLayout(self, cycle, node): + """Return a Layout object representing the requested cycle and time node.""" + version = (self._versionMajor, self._versionMinor) + timeGroupName = getH5GroupName(cycle, node) + + return Layout(version, self.h5db[timeGroupName]) + + def genTimeSteps(self) -> Generator[Tuple[int, int], None, None]: + """Returns a generator of (cycle, node) tuples that are present in the DB.""" + assert ( + self.h5db is not None + ), "Must open the database before calling genTimeSteps" + for groupName in sorted(self.h5db.keys()): + match = self.timeNodeGroupPattern.match(groupName) + if match: + cycle = int(match.groups()[0]) + node = int(match.groups()[1]) + yield (cycle, node) + + def genAuxiliaryData(self, ts: Tuple[int, int]) -> Generator[str, None, None]: + """Returns a generator of names of auxiliary data on the requested time point.""" + assert ( + self.h5db is not None + ), "Must open the database before calling genAuxiliaryData" + cycle, node = ts + groupName = getH5GroupName(cycle, node) + timeGroup = self.h5db[groupName] + exclude = set(ArmiObject.TYPES.keys()) + exclude.add("layout") + return (groupName + "/" + key for key in timeGroup.keys() if key not in exclude) + + @staticmethod + def getAuxiliaryDataPath(ts: Tuple[int, int], name: str) -> str: + return getH5GroupName(*ts) + "/" + name + + def keys(self): + return (g.name for g in self.genTimeStepGroups()) + + def getH5Group(self, r, statePointName=None): + """ + Get the H5Group for the current ARMI timestep. + + This method can be used to allow other interfaces to place data into the database + at the correct timestep. + """ + groupName = getH5GroupName(r.p.cycle, r.p.timeNode, statePointName) + if groupName in self.h5db: + return self.h5db[groupName] + else: + group = self.h5db.create_group(groupName) + group.attrs["cycle"] = r.p.cycle + group.attrs["timeNode"] = r.p.timeNode + return group + + def hasTimeStep(self, cycle, timeNode, statePointName=""): + """Returns True if (cycle, timeNode, statePointName) is contained in the database.""" + return getH5GroupName(cycle, timeNode, statePointName) in self.h5db + + def writeToDB(self, reactor, statePointName=None): + assert self.h5db is not None, "Database must be open before writing." + # _createLayout is recursive + h5group = self.getH5Group(reactor, statePointName) + runLog.info("Writing to database for statepoint: {}".format(h5group.name)) + layout = Layout((self.versionMajor, self.versionMinor), comp=reactor) + layout.writeToDB(h5group) + groupedComps = layout.groupedComps + + for comps in groupedComps.values(): + self._writeParams(h5group, comps) + + def syncToSharedFolder(self): + """ + Copy DB to run working directory. + + Needed when multiple MPI processes need to read the same db, for example + when a history is needed from independent runs (e.g. for fuel performance on + a variety of assemblies). + + Notes + ----- + At some future point, we may implement a client-server like DB system which + would render this kind of operation unnecessary. + """ + runLog.extra("Copying DB to shared working directory.") + self.h5db.flush() + shutil.copy(self._fullPath, self._fileName) + + def load( + self, + cycle, + node, + cs=None, + bp=None, + statePointName=None, + allowMissing=False, + ): + """Load a new reactor from (cycle, node). + + Case settings and blueprints can be provided by the client, or read from the + database itself. Providing these from the client could be useful when + performing snapshot runs or where it is expected to use results from a run + using different settings and continue with new settings (or if blueprints are + not on the database). Geometry is read from the database itself. + + .. impl:: Users can load a reactor from a DB. + :id: I_ARMI_DB_R_LOAD + :implements: R_ARMI_DB_R_LOAD + + This method creates a ``Reactor`` object by reading the reactor state out + of an ARMI database file. This is done by passing in mandatory arguements + that specify the exact place in time you want to load the reactor from. + (That is, the cycle and node numbers.) Users can either pass the settings + and blueprints directly into this method, or it will attempt to read them + from the database file. The primary work done here is to read the hierarchy + of reactor objects from the data file, then reconstruct them in the correct + order. + + Parameters + ---------- + cycle : int + Cycle number + node : int + Time node. If value is negative, will be indexed from EOC backwards + like a list. + cs : armi.settings.Settings (optional) + If not provided one is read from the database + bp : armi.reactor.Blueprints (optional) + If not provided one is read from the database + statePointName : str + Optional arbitrary statepoint name (e.g., "special" for "c00n00-special/") + allowMissing : bool, optional + Whether to emit a warning, rather than crash if reading a database + with undefined parameters. Default False. + + Returns + ------- + root : Reactor + The top-level object stored in the database; a Reactor. + """ + runLog.info("Loading reactor state for time node ({}, {})".format(cycle, node)) + + cs = cs or self.loadCS() + bp = bp or self.loadBlueprints() + + if node < 0: + numNodes = getNodesPerCycle(cs)[cycle] + if (node + numNodes) < 0: + raise ValueError( + f"Node {node} specified does not exist for cycle {cycle}" + ) + node = numNodes + node + + h5group = self.h5db[getH5GroupName(cycle, node, statePointName)] + + layout = Layout((self.versionMajor, self.versionMinor), h5group=h5group) + comps, groupedComps = layout._initComps(cs.caseTitle, bp) + + # populate data onto initialized components + for compType, compTypeList in groupedComps.items(): + self._readParams(h5group, compType, compTypeList, allowMissing=allowMissing) + + # assign params from blueprints + if bp is not None: + self._assignBlueprintsParams(bp, groupedComps) + + # stitch together + self._compose(iter(comps), cs) + + # also, make sure to update the global serial number so we don't re-use a number + parameterCollections.GLOBAL_SERIAL_NUM = max( + parameterCollections.GLOBAL_SERIAL_NUM, layout.serialNum.max() + ) + root = comps[0][0] + + # return a Reactor object + if cs[CONF_SORT_REACTOR]: + root.sort() + else: + runLog.warning( + "DeprecationWarning: This Reactor is not being sorted on DB load. " + f"Due to the setting {CONF_SORT_REACTOR}, this Reactor is unsorted. " + "But this feature is temporary and will be removed by 2024." + ) + + return root + + @staticmethod + def _assignBlueprintsParams(blueprints, groupedComps): + for compType, designs in ( + (Block, blueprints.blockDesigns), + (Assembly, blueprints.assemDesigns), + ): + paramsToSet = { + pDef.name + for pDef in compType.pDefs.inCategory( + parameters.Category.assignInBlueprints + ) + } + + for comp in groupedComps[compType]: + design = designs[comp.p.type] + for pName in paramsToSet: + val = getattr(design, pName) + if val is not None: + comp.p[pName] = val + + def _compose(self, comps, cs, parent=None): + """Given a flat collection of all of the ArmiObjects in the model, reconstitute the hierarchy.""" + comp, _, numChildren, location = next(comps) + + # attach the parent early, if provided; some cases need the parent attached for + # the rest of _compose to work properly. + comp.parent = parent + + # The Reactor adds a Core child by default, this is not ideal + for spontaneousChild in list(comp): + comp.remove(spontaneousChild) + + if isinstance(comp, Core): + pass + elif isinstance(comp, Assembly): + # Assemblies force their name to be something based on assemNum. When the + # assembly is created it gets a new assemNum, and throws out the correct + # name that we read from the DB + comp.name = comp.makeNameFromAssemNum(comp.p.assemNum) + comp.lastLocationLabel = Assembly.DATABASE + + # set the spatialLocators on each component + if location is not None: + if parent is not None and parent.spatialGrid is not None: + comp.spatialLocator = parent.spatialGrid[location] + else: + comp.spatialLocator = grids.CoordinateLocation( + location[0], location[1], location[2], None + ) + + # Need to keep a collection of Component instances for linked dimension + # resolution, before they can be add()ed to their parents. Not just filtering + # out of `children`, since resolveLinkedDims() needs a dict + childComponents = collections.OrderedDict() + children = [] + + for _ in range(numChildren): + child = self._compose(comps, cs, parent=comp) + children.append(child) + if isinstance(child, Component): + childComponents[child.name] = child + + for _childName, child in childComponents.items(): + child.resolveLinkedDims(childComponents) + + for child in children: + comp.add(child) + + if isinstance(comp, Core): + # TODO: This is also an issue related to geoms and which core is "The Core". + # We only have a good geom for the main core, so can't do process loading on + # the SFP, etc. + if comp.hasFlags(Flags.CORE): + comp.processLoading(cs, dbLoad=True) + elif isinstance(comp, Assembly): + comp.calculateZCoords() + + return comp + + def _writeParams(self, h5group, comps): + c = comps[0] + groupName = c.__class__.__name__ + if groupName not in h5group: + # Only create the group if it doesnt already exist. This happens when + # re-writing params in the same time node (e.g. something changed between + # EveryNode and EOC) + g = h5group.create_group(groupName) + else: + g = h5group[groupName] + + for paramDef in c.p.paramDefs.toWriteToDB(): + attrs = {} + + if hasattr(c, "DIMENSION_NAMES") and paramDef.name in c.DIMENSION_NAMES: + linkedDims = [] + data = [] + + for _, c in enumerate(comps): + val = c.p[paramDef.name] + if isinstance(val, tuple): + linkedDims.append("{}.{}".format(val[0].name, val[1])) + data.append(val[0].getDimension(val[1])) + else: + linkedDims.append("") + data.append(val) + + data = numpy.array(data) + if any(linkedDims): + attrs["linkedDims"] = numpy.array(linkedDims).astype("S") + else: + # NOTE: after loading, the previously unset values will be defaulted + temp = [c.p.get(paramDef.name, paramDef.default) for c in comps] + if paramDef.serializer is not None: + data, sAttrs = paramDef.serializer.pack(temp) + assert ( + data.dtype.kind != "O" + ), "{} failed to convert {} to a numpy-supported type.".format( + paramDef.serializer.__name__, paramDef.name + ) + attrs.update(sAttrs) + attrs[_SERIALIZER_NAME] = paramDef.serializer.__name__ + attrs[_SERIALIZER_VERSION] = paramDef.serializer.version + else: + data = numpy.array(temp) + del temp + + # Convert Unicode to byte-string + if data.dtype.kind == "U": + data = data.astype("S") + + if data.dtype.kind == "O": + # Something was added to the data array that caused numpy to want to + # treat it as a general-purpose Object array. This usually happens + # because: + # - the data contain NoDefaults + # - the data contain one or more Nones, + # - the data contain special types like tuples, dicts, etc + # - the data are composed of arrays that numpy would otherwise happily + # convert to a higher-order array, but the dimensions of the sub-arrays + # are inconsistent ("jagged") + # - there is some sort of honest-to-goodness weird object + # We want to support the first two cases with minimal intrusion, since + # these should be pretty easy to faithfully represent in the db. The + # jagged case should be supported as well, but may require a less + # faithful representation (e.g. flattened), but the last case isn't + # really worth supporting. + + # Here is one proposal: + # - Check to see if the array is jagged. all(shape == shape[0]). If not, + # flatten, store the data offsets and array shapes, and None locations + # as attrs + # - If not jagged, all top-level ndarrays are the same shape, so it is + # easier to replace Nones with ndarrays filled with special values. + if parameters.NoDefault in data: + data = None + else: + data, specialAttrs = packSpecialData(data, paramDef.name) + attrs.update(specialAttrs) + + if data is None: + continue + + try: + if paramDef.name in g: + raise ValueError( + "`{}` was already in `{}`. This time node " + "should have been empty".format(paramDef.name, g) + ) + + dataset = g.create_dataset(paramDef.name, data=data, compression="gzip") + if any(attrs): + Database._writeAttrs(dataset, h5group, attrs) + except Exception: + runLog.error( + "Failed to write {} to database. Data: " + "{}".format(paramDef.name, data) + ) + raise + if isinstance(c, Block): + self._addHomogenizedNumberDensityParams(comps, g) + + @staticmethod + def _addHomogenizedNumberDensityParams(blocks, h5group): + """ + Create on-the-fly block homog. number density params for XTVIEW viewing. + + See Also + -------- + collectBlockNumberDensities + """ + nDens = collectBlockNumberDensities(blocks) + + for nucName, numDens in nDens.items(): + h5group.create_dataset(nucName, data=numDens, compression="gzip") + + @staticmethod + def _readParams(h5group, compTypeName, comps, allowMissing=False): + g = h5group[compTypeName] + + renames = getApp().getParamRenames() + + pDefs = comps[0].pDefs + + # this can also be made faster by specializing the method by type + for paramName, dataSet in g.items(): + # Honor historical databases where the parameters may have changed names + # since. + while paramName in renames: + paramName = renames[paramName] + + try: + pDef = pDefs[paramName] + except KeyError: + if re.match(r"^n[A-Z][a-z]?\d*", paramName): + # This is a temporary viz param (number density) made by + # _addHomogenizedNumberDensityParams ignore it safely + continue + else: + # If a parameter exists in the database but not in the application + # reading it, we can technically keep going. Since this may lead to + # potential correctness issues, raise a warning + if allowMissing: + runLog.warning( + "Found `{}` parameter `{}` in the database, which is not defined. " + "Ignoring it.".format(compTypeName, paramName) + ) + continue + else: + raise + + data = dataSet[:] + attrs = Database._resolveAttrs(dataSet.attrs, h5group) + + if pDef.serializer is not None: + assert _SERIALIZER_NAME in dataSet.attrs + assert dataSet.attrs[_SERIALIZER_NAME] == pDef.serializer.__name__ + assert _SERIALIZER_VERSION in dataSet.attrs + + data = numpy.array( + pDef.serializer.unpack( + data, dataSet.attrs[_SERIALIZER_VERSION], attrs + ) + ) + + if data.dtype.type is numpy.string_: + data = numpy.char.decode(data) + + if attrs.get("specialFormatting", False): + data = unpackSpecialData(data, attrs, paramName) + + linkedDims = [] + if "linkedDims" in attrs: + linkedDims = numpy.char.decode(attrs["linkedDims"]) + + # iterating of numpy is not fast... + for c, val, linkedDim in itertools.zip_longest( + comps, data.tolist(), linkedDims, fillvalue="" + ): + try: + if linkedDim != "": + c.p[paramName] = linkedDim + else: + c.p[paramName] = val + except AssertionError as ae: + # happens when a param was deprecated but being loaded from old DB + runLog.warning( + f"{str(ae)}\nSkipping load of invalid param `{paramName}`" + " (possibly loading from old DB)\n" + ) + + def getHistoryByLocation( + self, + comp: ArmiObject, + params: Optional[List[str]] = None, + timeSteps: Optional[Sequence[Tuple[int, int]]] = None, + ) -> History: + """Get the parameter histories at a specific location.""" + return self.getHistoriesByLocation([comp], params=params, timeSteps=timeSteps)[ + comp + ] + + def getHistoriesByLocation( + self, + comps: Sequence[ArmiObject], + params: Optional[List[str]] = None, + timeSteps: Optional[Sequence[Tuple[int, int]]] = None, + ) -> Histories: + """ + Get the parameter histories at specific locations. + + This has a number of limitations, which should in practice not be too limiting: + - The passed objects must have IndexLocations. This type of operation doesn't + make much sense otherwise. + - The passed objects must exist in a hierarchy that leads to a Core + object, which serves as an anchor that can fully define all index locations. + This could possibly be made more general by extending grids, but that gets a + little more complicated. + - All requested objects must exist under the **same** anchor object, and at the + same depth below it. + - All requested objects must have the same type. + + Parameters + ---------- + comps : list of ArmiObject + The components/composites that currently occupy the location that you want + histories at. ArmiObjects are passed, rather than locations, because this + makes it easier to figure out things related to layout. + params : List of str, optional + The parameter names for the parameters that we want the history of. If None, + all parameter history is given + timeSteps : List of (cycle, node) tuples, optional + The time nodes that you want history for. If None, all available time nodes + will be returned. + """ + if self.versionMinor < 4: + raise ValueError( + "Location-based histories are only supported for db " + "version 3.4 and greater. This database is version " + f"{self.versionMajor}, {self.versionMinor}." + ) + + locations = [c.spatialLocator.getCompleteIndices() for c in comps] + + histData: Histories = { + c: collections.defaultdict(collections.OrderedDict) for c in comps + } + + # Check our assumptions about the passed locations: + # All locations must have the same parent and bear the same relationship to the + # anchor object + anchors = { + obj.getAncestorAndDistance(lambda a: isinstance(a, Core)) for obj in comps + } + + if len(anchors) != 1: + raise ValueError( + "The passed objects do not have the same anchor or distance to that " + "anchor; encountered the following: {}".format(anchors) + ) + + anchorInfo = anchors.pop() + if anchorInfo is not None: + anchor, anchorDistance = anchorInfo + else: + raise ValueError( + "Could not determine an anchor object for the passed components" + ) + + anchorSerialNum = anchor.p.serialNum + + # All objects of the same type + objectTypes = {type(obj) for obj in comps} + if len(objectTypes) != 1: + raise TypeError( + "The passed objects must be the same type; got objects of " + "types `{}`".format(objectTypes) + ) + + compType = objectTypes.pop() + objClassName = compType.__name__ + + locToComp = {c.spatialLocator.getCompleteIndices(): c for c in comps} + + for h5TimeNodeGroup in self.genTimeStepGroups(timeSteps): + if "layout" not in h5TimeNodeGroup: + # layout hasnt been written for this time step, so we can't get anything + # useful here. Perhaps the current value is of use, in which case the + # DatabaseInterface should be used. + continue + + cycle = h5TimeNodeGroup.attrs["cycle"] + timeNode = h5TimeNodeGroup.attrs["timeNode"] + layout = Layout( + (self.versionMajor, self.versionMinor), h5group=h5TimeNodeGroup + ) + + ancestors = layout.computeAncestors( + layout.serialNum, layout.numChildren, depth=anchorDistance + ) + + lLocation = layout.location + # filter for objects that live under the desired ancestor and at a desired location + objectIndicesInLayout = numpy.array( + [ + i + for i, (ancestor, loc) in enumerate(zip(ancestors, lLocation)) + if ancestor == anchorSerialNum and loc in locations + ] + ) + + # This could also be way more efficient if lLocation were a numpy array + objectLocationsInLayout = [lLocation[i] for i in objectIndicesInLayout] + + objectIndicesInData = numpy.array(layout.indexInData)[ + objectIndicesInLayout + ].tolist() + + try: + h5GroupForType = h5TimeNodeGroup[objClassName] + except KeyError as ee: + runLog.error( + "{} not found in {} of {}".format( + objClassName, h5TimeNodeGroup, self + ) + ) + raise ee + + for paramName in params or h5GroupForType.keys(): + if paramName == "location": + # location is special, since it is stored in layout/ + data = numpy.array(layout.location)[objectIndicesInLayout] + elif paramName in h5GroupForType: + dataSet = h5GroupForType[paramName] + try: + data = dataSet[objectIndicesInData] + except: + runLog.error( + "Failed to load index {} from {}@{}".format( + objectIndicesInData, dataSet, (cycle, timeNode) + ) + ) + raise + + if data.dtype.type is numpy.string_: + data = numpy.char.decode(data) + + if dataSet.attrs.get("specialFormatting", False): + if dataSet.attrs.get("nones", False): + data = replaceNonsenseWithNones(data, paramName) + else: + raise ValueError( + "History tracking for non-None, " + "special-formatted parameters is not supported: " + "{}, {}".format( + paramName, {k: v for k, v in dataSet.attrs.items()} + ) + ) + else: + # Nothing in the database for this param, so use the default value + data = numpy.repeat( + parameters.byNameAndType(paramName, compType).default, + len(comps), + ) + + # store data to the appropriate comps. This is where taking components + # as the argument (rather than locations) is a little bit peculiar. + # + # At this point, `data` are arranged by the order of elements in + # `objectIndicesInData`, which corresponds to the order of + # `objectIndicesInLayout` + for loc, val in zip(objectLocationsInLayout, data.tolist()): + comp = locToComp[loc] + histData[comp][paramName][cycle, timeNode] = val + return histData + + def getHistory( + self, + comp: ArmiObject, + params: Optional[Sequence[str]] = None, + timeSteps: Optional[Sequence[Tuple[int, int]]] = None, + ) -> History: + """ + Get parameter history for a single ARMI Object. + + Parameters + ---------- + comps + An individual ArmiObject + params + parameters to gather + + Returns + ------- + dict + Dictionary of str/list pairs. + """ + return self.getHistories([comp], params, timeSteps)[comp] + + def getHistories( + self, + comps: Sequence[ArmiObject], + params: Optional[Sequence[str]] = None, + timeSteps: Optional[Sequence[Tuple[int, int]]] = None, + ) -> Histories: + """ + Get the parameter histories for a sequence of ARMI Objects. + + This implementation is unaware of the state of the reactor outside of the + database itself, and is therefore not usually what client code should be calling + directly during normal ARMI operation. It only knows about historical data that + have actually been written to the database. Usually one wants to be able to get + historical, plus current data, for which the similar method on the + DatabaseInterface may be more useful. + + Parameters + ---------- + comps + Something that is iterable multiple times + params + parameters to gather. + timeSteps + Selection of time nodes to get data for. If omitted, return full history + + Returns + ------- + dict + Dictionary ArmiObject (input): dict of str/list pairs containing ((cycle, + node), value). + """ + histData: Histories = { + c: collections.defaultdict(collections.OrderedDict) for c in comps + } + types = {c.__class__ for c in comps} + compsByTypeThenSerialNum: Dict[Type[ArmiObject], Dict[int, ArmiObject]] = { + t: dict() for t in types + } + + for c in comps: + compsByTypeThenSerialNum[c.__class__][c.p.serialNum] = c + + for h5TimeNodeGroup in self.genTimeStepGroups(timeSteps): + if "layout" not in h5TimeNodeGroup: + # Layout hasn't been written for this time step, so whatever is in there + # didn't come from the DatabaseInterface. Probably because it's the + # current time step and something has created the group to store aux + # data + continue + + cycle = h5TimeNodeGroup.attrs["cycle"] + timeNode = h5TimeNodeGroup.attrs["timeNode"] + layout = Layout( + (self.versionMajor, self.versionMinor), h5group=h5TimeNodeGroup + ) + + for compType, compsBySerialNum in compsByTypeThenSerialNum.items(): + compTypeName = compType.__name__ + try: + h5GroupForType = h5TimeNodeGroup[compTypeName] + except KeyError as ee: + runLog.error( + "{} not found in {} of {}".format( + compTypeName, h5TimeNodeGroup, self + ) + ) + raise ee + layoutIndicesForType = numpy.where(layout.type == compTypeName)[0] + serialNumsForType = layout.serialNum[layoutIndicesForType].tolist() + layoutIndexInData = layout.indexInData[layoutIndicesForType].tolist() + + indexInData = [] + reorderedComps = [] + + for ii, sn in zip(layoutIndexInData, serialNumsForType): + d = compsBySerialNum.get(sn, None) + if d is not None: + indexInData.append(ii) + reorderedComps.append(d) + + if not indexInData: + continue + + # note this is very similar to _readParams, but there are some important + # differences. + # 1) we are not assigning to p[paramName] + # 2) not using linkedDims at all + # 3) not performing parameter renaming. This may become necessary + for paramName in params or h5GroupForType.keys(): + if paramName == "location": + # cast to a numpy array so that we can use list indices + data = numpy.array(layout.location)[layoutIndicesForType][ + indexInData + ] + elif paramName in h5GroupForType: + dataSet = h5GroupForType[paramName] + try: + data = dataSet[indexInData] + except: + runLog.error( + "Failed to load index {} from {}@{}".format( + indexInData, dataSet, (cycle, timeNode) + ) + ) + raise + + if data.dtype.type is numpy.string_: + data = numpy.char.decode(data) + + if dataSet.attrs.get("specialFormatting", False): + if dataSet.attrs.get("nones", False): + data = replaceNonsenseWithNones(data, paramName) + else: + raise ValueError( + "History tracking for non-none special formatting " + "not supported: {}, {}".format( + paramName, + {k: v for k, v in dataSet.attrs.items()}, + ) + ) + else: + # Nothing in the database, so use the default value + data = numpy.repeat( + parameters.byNameAndType(paramName, compType).default, + len(reorderedComps), + ) + + # iterating of numpy is not fast.. + for c, val in zip(reorderedComps, data.tolist()): + if isinstance(val, list): + val = numpy.array(val) + + histData[c][paramName][cycle, timeNode] = val + + r = comps[0].getAncestorWithFlags(Flags.REACTOR) + cycleNode = r.p.cycle, r.p.timeNode + for c, paramHistories in histData.items(): + for paramName, hist in paramHistories.items(): + if cycleNode not in hist: + try: + hist[cycleNode] = c.p[paramName] + except: # noqa: bare-except + if paramName == "location": + hist[cycleNode] = c.spatialLocator.indices + + return histData + + @staticmethod + def _writeAttrs(obj, group, attrs): + """ + Handle safely writing attributes to a dataset, handling large data if necessary. + + This will attempt to store attributes directly onto an HDF5 object if possible, + falling back to proper datasets and reference attributes if necessary. This is + needed because HDF5 tries to fit attributes into the object header, which has + limited space. If an attribute is too large, h5py raises a RuntimeError. + In such cases, this will store the attribute data in a proper dataset and + place a reference to that dataset in the attribute instead. + + In practice, this takes ``linkedDims`` attrs from a particular component type (like + ``c00n00/Circle/id``) and stores them in new datasets (like + ``c00n00/attrs/1_linkedDims``, ``c00n00/attrs/2_linkedDims``) and then sets the + object's attrs to links to those datasets. + """ + for key, value in attrs.items(): + try: + obj.attrs[key] = value + except RuntimeError as err: + if "object header message is too large" not in err.args[0]: + raise + + runLog.info( + "Storing attribute `{}` for `{}` into it's own dataset within " + "`{}/attrs`".format(key, obj, group) + ) + + if "attrs" not in group: + attrGroup = group.create_group("attrs") + else: + attrGroup = group["attrs"] + dataName = str(len(attrGroup)) + "_" + key + attrGroup[dataName] = value + + # using a soft link here allows us to cheaply copy time nodes without + # needing to crawl through and update object references. + linkName = attrGroup[dataName].name + obj.attrs[key] = "@{}".format(linkName) + + @staticmethod + def _resolveAttrs(attrs, group): + """ + Reverse the action of _writeAttrs. + + This reads actual attrs and looks for the real data + in the datasets that the attrs were pointing to. + """ + attr_link = re.compile("^@(.*)$") + + resolved = {} + for key, val in attrs.items(): + try: + if isinstance(val, h5py.h5r.Reference): + # Old style object reference. If this cannot be dereferenced, it is + # likely because mergeHistory was used to get the current database, + # which does not preserve references. + resolved[key] = group[val] + elif isinstance(val, str): + m = attr_link.match(val) + if m: + # dereference the path to get the data out of the dataset. + resolved[key] = group[m.group(1)][()] + else: + resolved[key] = val + else: + resolved[key] = val + except ValueError: + runLog.error(f"HDF error loading {key} : {val}\nGroup: {group}") + raise + + return resolved + + +def packSpecialData( + data: numpy.ndarray, paramName: str +) -> Tuple[Optional[numpy.ndarray], Dict[str, Any]]: + """ + Reduce data that wouldn't otherwise play nicely with HDF5/numpy arrays to a format + that will. + + This is the main entry point for conforming "strange" data into something that will + both fit into a numpy array/HDF5 dataset, and be recoverable to its original-ish + state when reading it back in. This is accomplished by detecting a handful of known + offenders and using various HDF5 attributes to store necessary auxiliary data. It is + important to keep in mind that the data that is passed in has already been converted + to a numpy array, so the top dimension is always representing the collection of + composites that are storing the parameters. For instance, if we are dealing with a + Block parameter, the first index in the numpy array of data is the block index; so + if each block has a parameter that is a dictionary, ``data`` would be a ndarray, + where each element is a dictionary. This routine supports a number of different + "strange" things: + + * Dict[str, float]: These are stored by finding the set of all keys for all + instances, and storing those keys as a list in an attribute. The data themselves + are stored as arrays indexed by object, then key index. Dictionaries lacking data + for a key store a nan in it's place. This will work well in instances where most + objects have data for most keys. + * Jagged arrays: These are stored by concatenating all of the data into a single, + one-dimensional array, and storing attributes to describe the shapes of each + object's data, and an offset into the beginning of each object's data. + * Arrays with ``None`` in them: These are stored by replacing each instance of + ``None`` with a magical value that shouldn't be encountered in realistic + scenarios. + + Parameters + ---------- + data + An ndarray storing the data that we want to stuff into the database. These are + usually dtype=Object, which is how we usually end up here in the first place. + + paramName + The parameter name that we are trying to store data for. This is mostly used for + diagnostics. + + See Also + -------- + unpackSpecialData + """ + # Check to make sure that we even need to do this. If the numpy data type is + # not "O", chances are we have nice, clean data. + if data.dtype != "O": + return data, {} + + attrs: Dict[str, Any] = {"specialFormatting": True} + + # make a copy of the data, so that the original is unchanged + data = copy.copy(data) + + # find locations of Nones. The below works for ndarrays, whereas `data == None` + # gives a single True/False value + nones = numpy.where([d is None for d in data])[0] + + if len(nones) == data.shape[0]: + # Everything is None, so why bother? + return None, attrs + + if len(nones) > 0: + attrs["nones"] = True + + # TODO: this whole if/then/elif/else can be optimized by looping once and then + # determining the correct action + # A robust solution would need + # to do this on a case-by-case basis, and re-do it any time we want to + # write, since circumstances may change. Not only that, but we may need + # to do perform more that one of these operations to get to an array + # that we want to put in the database. + if any(isinstance(d, dict) for d in data): + # we're assuming that a dict is {str: float}. We store the union of + # all of the keys for all of the objects as a special "keys" + # attribute, and store a value for all of those keys for all + # objects, whether or not there is actually data associated with + # that key (storing a nan when no data). This makes for a simple + # approach that is somewhat digestible just looking at the db, and + # should be quite efficient in the case where most objects have data + # for most keys. + attrs["dict"] = True + keys = sorted({k for d in data for k in d}) + data = numpy.array([[d.get(k, numpy.nan) for k in keys] for d in data]) + if data.dtype == "O": + # The data themselves are nasty. We could support this, but best to wait for + # a credible use case. + raise TypeError( + "Unable to coerce dictionary data into usable numpy array for " + "{}".format(paramName) + ) + attrs["keys"] = numpy.array(keys).astype("S") + + return data, attrs + + # conform non-numpy arrays to numpy + for i, val in enumerate(data): + if isinstance(val, (list, tuple)): + data[i] = numpy.array(val) + + if not any(isinstance(d, numpy.ndarray) for d in data): + # looks like 1-D plain-old-data + data = replaceNonesWithNonsense(data, paramName, nones) + return data, attrs + + # check if data is jagged + candidate = next((d for d in data if d is not None)) + shape = candidate.shape + ndim = candidate.ndim + isJagged = ( + not all(d.shape == shape for d in data if d is not None) or candidate.size == 0 + ) + + if isJagged: + assert all( + val.ndim == ndim for val in data if val is not None + ), "Inconsistent dimensions in jagged array for: {}\nDimensions: {}".format( + paramName, [val.ndim for val in data if val is not None] + ) + attrs["jagged"] = True + + # offsets[i] is the index of the zero-th element of sub-array i + offsets = numpy.array( + [0] + + list( + itertools.accumulate(val.size if val is not None else 0 for val in data) + )[:-1] + ) + + # shapes[i] is the shape of the i-th sub-array. Nones are represented by all + # zeros + shapes = numpy.array( + list(val.shape if val is not None else ndim * (0,) for val in data) + ) + + data = numpy.delete(data, nones) + + data = numpy.concatenate(data, axis=None) + + attrs["offsets"] = offsets + attrs["shapes"] = shapes + attrs["noneLocations"] = nones + return data, attrs + + if any(isinstance(d, (tuple, list, numpy.ndarray)) for d in data): + data = replaceNonesWithNonsense(data, paramName, nones) + return data, attrs + + if len(nones) == 0: + raise TypeError( + "Cannot write {} to the database, it did not resolve to a numpy/HDF5 " + "type.".format(paramName) + ) + + runLog.error("Data unable to find special none value: {}".format(data)) + raise TypeError("Failed to process special data for {}".format(paramName)) + + +def unpackSpecialData(data: numpy.ndarray, attrs, paramName: str) -> numpy.ndarray: + """ + Extract data from a specially-formatted HDF5 dataset into a numpy array. + + This should invert the operations performed by :py:func:`packSpecialData`. + + Parameters + ---------- + data + Specially-formatted data array straight from the database. + + attrs + The attributes associated with the dataset that contained the data. + + paramName + The name of the parameter that is being unpacked. Only used for diagnostics. + + Returns + ------- + numpy.ndarray + An ndarray containing the closest possible representation of the data that was + originally written to the database. + + See Also + -------- + packSpecialData + """ + if not attrs.get("specialFormatting", False): + # The data were not subjected to any special formatting; short circuit. + assert data.dtype != "O" + return data + + unpackedData: List[Any] + if attrs.get("nones", False) and not attrs.get("jagged", False): + data = replaceNonsenseWithNones(data, paramName) + return data + if attrs.get("jagged", False): + offsets = attrs["offsets"] + shapes = attrs["shapes"] + ndim = len(shapes[0]) + emptyArray = numpy.ndarray(ndim * (0,), dtype=data.dtype) + unpackedJaggedData: List[Optional[numpy.ndarray]] = [] + for offset, shape in zip(offsets, shapes): + if tuple(shape) == ndim * (0,): + # Start with an empty array. This may be replaced with a None later + unpackedJaggedData.append(emptyArray) + else: + unpackedJaggedData.append( + numpy.ndarray(shape, dtype=data.dtype, buffer=data[offset:]) + ) + for i in attrs["noneLocations"]: + unpackedJaggedData[i] = None + + return numpy.array(unpackedJaggedData, dtype=object) + if attrs.get("dict", False): + keys = numpy.char.decode(attrs["keys"]) + unpackedData = [] + assert data.ndim == 2 + for d in data: + unpackedData.append( + {key: value for key, value in zip(keys, d) if not numpy.isnan(value)} + ) + return numpy.array(unpackedData) + + raise ValueError( + "Do not recognize the type of special formatting that was applied " + "to {}. Attrs: {}".format(paramName, {k: v for k, v in attrs.items()}) + ) + + +def collectBlockNumberDensities(blocks) -> Dict[str, numpy.ndarray]: + """ + Collect block-by-block homogenized number densities for each nuclide. + + Long ago, composition was stored on block params. No longer; they are on the + component numberDensity params. These block-level params, are still useful to see + compositions in some visualization tools. Rather than keep them on the reactor + model, we dynamically compute them here and slap them in the database. These are + ignored upon reading and will not affect the results. + + Remove this once a better viz tool can view composition distributions. Also remove + the try/except in ``_readParams`` + """ + nucNames = sorted(list(set(nucName for b in blocks for nucName in b.getNuclides()))) + nucBases = [nuclideBases.byName[nn] for nn in nucNames] + # it's faster to loop over blocks first and get all number densities from each + # than it is to get one nuclide at a time from each block because of area fraction + # calculations. So we use some RAM here instead. + nucDensityMatrix = [] + for block in blocks: + nucDensityMatrix.append(block.getNuclideNumberDensities(nucNames)) + nucDensityMatrix = numpy.array(nucDensityMatrix) + + dataDict = dict() + for ni, nb in enumerate(nucBases): + # the nth column is a vector of nuclide densities for this nuclide across all blocks + dataDict[nb.getDatabaseName()] = nucDensityMatrix[:, ni] + + return dataDict diff --git a/armi/bookkeeping/db/database3.py b/armi/bookkeeping/db/database3.py index ee67f70dd..43085f540 100644 --- a/armi/bookkeeping/db/database3.py +++ b/armi/bookkeeping/db/database3.py @@ -1,4 +1,4 @@ -# Copyright 2019 TerraPower, LLC +# Copyright 2024 TerraPower, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,1739 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""This is a temporary file created to ease a long API transition. -""" -ARMI Database implementation, version 3.4. - -A reactor model should be fully recoverable from the database; all the way down to the -component level. As a result, the structure of the underlying data is bound to the -hierarchical Composite Reactor Model. Furthermore, this database format is intended to -be more dynamic, permitting as-yet undeveloped levels and classes in the Composite -Reactor Model to be supported as they are added. More high-level discussion is -contained in :doc:`/user/outputs/database`. +Originally, the ``Database3`` class existed as a temporary naming stop-gap as ARMI transitioned from +one version of a "Database" class to another. But, for reasons lost to history, the "Database3" name +stuck. -The :py:class:`Database3` class contains most of the functionality for interacting -with the underlying data. This includes things like dumping a Reactor state to the -database and loading it back again, as well as extracting historical data for a given -object or collection of object from the database file. However, for the nitty-gritty -details of how the hierarchical Composite Reactor Model is translated to the flat file -database, please refer to :py:mod:`armi.bookkeeping.db.layout`. - -Refer to :py:mod:`armi.bookkeeping.db` for information about versioning. +However, it would be a painful effort if ARMI just renamed this class now. That's why this file +exists; to allow for a long, slow API deprecation. """ -import collections -import copy -import io -import itertools -import os -import pathlib -import re -import shutil -import subprocess -import sys -from platform import uname -from typing import ( - Optional, - Tuple, - Type, - Dict, - Any, - List, - Sequence, - Generator, -) - -import h5py -import numpy - -from armi import context -from armi import getApp -from armi import meta -from armi import runLog -from armi import settings -from armi.bookkeeping.db.layout import ( - Layout, - DB_VERSION, - replaceNonesWithNonsense, - replaceNonsenseWithNones, -) -from armi.bookkeeping.db.typedefs import History, Histories -from armi.nucDirectory import nuclideBases -from armi.physics.neutronics.settings import CONF_LOADING_FILE -from armi.reactor import grids -from armi.reactor import parameters -from armi.reactor import systemLayoutInput -from armi.reactor.assemblies import Assembly -from armi.reactor.blocks import Block -from armi.reactor.components import Component -from armi.reactor.composites import ArmiObject -from armi.reactor.flags import Flags -from armi.reactor.parameters import parameterCollections -from armi.reactor.reactors import Core -from armi.settings.fwSettings.globalSettings import CONF_SORT_REACTOR -from armi.utils import getNodesPerCycle -from armi.utils.textProcessors import resolveMarkupInclusions - -# CONSTANTS -_SERIALIZER_NAME = "serializerName" -_SERIALIZER_VERSION = "serializerVersion" - - -def getH5GroupName(cycle: int, timeNode: int, statePointName: str = None) -> str: - """ - Naming convention specifier. - - ARMI defines the naming convention cXXnYY for groups of simulation data. - That is, data is grouped by cycle and time node information during a - simulated run. - """ - return "c{:0>2}n{:0>2}{}".format(cycle, timeNode, statePointName or "") - - -class Database3: - """ - Version 3 of the ARMI Database, handling serialization and loading of Reactor states. - - This implementation of the database pushes all objects in the Composite Reactor - Model into the database. This process is aided by the ``Layout`` class, which - handles the packing and unpacking of the structure of the objects, their - relationships, and their non-parameter attributes. - - .. impl:: The database files are H5, and thus language agnostic. - :id: I_ARMI_DB_H51 - :implements: R_ARMI_DB_H5 - - This class implements a light wrapper around H5 files, so they can be used to - store ARMI outputs. H5 files are commonly used in scientific applications in - Fortran and C++. As such, they are entirely language agnostic binary files. The - implementation here is that ARMI wraps the ``h5py`` library, and uses its - extensive tooling, instead of re-inventing the wheel. - - See Also - -------- - `doc/user/outputs/database` for more details. - """ - - timeNodeGroupPattern = re.compile(r"^c(\d\d)n(\d\d)$") - - def __init__(self, fileName: os.PathLike, permission: str): - """ - Create a new Database3 object. - - Parameters - ---------- - fileName: - name of the file - - permission: - file permissions, write ("w") or read ("r") - """ - self._fileName = fileName - # No full path yet; we will determine this based on FAST_PATH and permissions - self._fullPath: Optional[str] = None - self._permission = permission - self.h5db: Optional[h5py.File] = None - - # Allows context management on open files. - # If context management is used on a file that is already open, it will not reopen - # and it will also not close after leaving that context. - # This allows the treatment of all databases the same whether they are open or - # closed. - self._openCount: int = 0 - - if permission == "w": - self.version = DB_VERSION - else: - # will be set upon read - self._version = None - self._versionMajor = None - self._versionMinor = None - - @property - def version(self) -> str: - return self._version - - @version.setter - def version(self, value: str): - self._version = value - self._versionMajor, self._versionMinor = (int(v) for v in value.split(".")) - - @property - def versionMajor(self): - return self._versionMajor - - @property - def versionMinor(self): - return self._versionMinor - - def __repr__(self): - return "<{} {}>".format( - self.__class__.__name__, repr(self.h5db).replace("<", "").replace(">", "") - ) - - def open(self): - if self.h5db is not None: - raise ValueError( - "This database is already open; make sure to close it " - "before trying to open it again." - ) - filePath = self._fileName - self._openCount += 1 - - if self._permission in {"r", "a"}: - self._fullPath = os.path.abspath(filePath) - self.h5db = h5py.File(filePath, self._permission) - self.version = self.h5db.attrs["databaseVersion"] - return - - if self._permission == "w": - # assume fast path! - filePath = os.path.join(context.getFastPath(), filePath) - self._fullPath = os.path.abspath(filePath) - - else: - runLog.error("Unrecognized file permissions `{}`".format(self._permission)) - raise ValueError( - "Cannot open database with permission `{}`".format(self._permission) - ) - - # open the database, and write a bunch of metadata to it - runLog.info("Opening database file at {}".format(os.path.abspath(filePath))) - self.h5db = h5py.File(filePath, self._permission) - self.h5db.attrs["successfulCompletion"] = False - self.h5db.attrs["version"] = meta.__version__ - self.h5db.attrs["databaseVersion"] = self.version - self.writeSystemAttributes(self.h5db) - - # store app and plugin data - app = getApp() - self.h5db.attrs["appName"] = app.name - plugins = app.pluginManager.list_name_plugin() - ps = [ - (os.path.abspath(sys.modules[p[1].__module__].__file__), p[1].__name__) - for p in plugins - ] - ps = numpy.array([str(p[0]) + ":" + str(p[1]) for p in ps]).astype("S") - self.h5db.attrs["pluginPaths"] = ps - self.h5db.attrs["localCommitHash"] = Database3.grabLocalCommitHash() - - @staticmethod - def writeSystemAttributes(h5db): - """Write system attributes to the database. - - .. impl:: Add system attributes to the database. - :id: I_ARMI_DB_QA - :implements: R_ARMI_DB_QA - - This method writes some basic system information to the H5 file. This is - designed as a starting point, so users can see information about the system - their simulations were run on. As ARMI is used on Windows and Linux, the - tooling here has to be platform independent. The two major sources of - information are the ARMI :py:mod:`context ` module and the - Python standard library ``platform``. - """ - h5db.attrs["user"] = context.USER - h5db.attrs["python"] = sys.version - h5db.attrs["armiLocation"] = os.path.dirname(context.ROOT) - h5db.attrs["startTime"] = context.START_TIME - h5db.attrs["machines"] = numpy.array(context.MPI_NODENAMES).astype("S") - - # store platform data - platform_data = uname() - h5db.attrs["platform"] = platform_data.system - h5db.attrs["hostname"] = platform_data.node - h5db.attrs["platformRelease"] = platform_data.release - h5db.attrs["platformVersion"] = platform_data.version - h5db.attrs["platformArch"] = platform_data.processor - - @staticmethod - def grabLocalCommitHash(): - """ - Try to determine the local Git commit. - - We have to be sure to handle the errors where the code is run on a system that - doesn't have Git installed. Or if the code is simply not run from inside a repo. - - Returns - ------- - str - The commit hash if it exists, otherwise "unknown". - """ - unknown = "unknown" - if not shutil.which("git"): - # no git available. cannot check git info - return unknown - repo_exists = ( - subprocess.run( - "git rev-parse --git-dir".split(), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ).returncode - == 0 - and subprocess.run( - ["git", "describe"], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ).returncode - == 0 - ) - if repo_exists: - try: - commit_hash = subprocess.check_output(["git", "describe"]) - return commit_hash.decode("utf-8").strip() - except: # noqa: bare-except - return unknown - else: - return unknown - - def close(self, completedSuccessfully=False): - """Close the DB and perform cleanups and auto-conversions.""" - self._openCount = 0 - if self.h5db is None: - return - - if self._permission == "w": - self.h5db.attrs["successfulCompletion"] = completedSuccessfully - # a bit redundant to call flush, but with unreliable IO issues, why not? - self.h5db.flush() - - self.h5db.close() - self.h5db = None - - if self._permission == "w": - # move out of the FAST_PATH and into the working directory - newPath = shutil.move(self._fullPath, self._fileName) - self._fullPath = os.path.abspath(newPath) - - def splitDatabase( - self, keepTimeSteps: Sequence[Tuple[int, int]], label: str - ) -> str: - """ - Discard all data except for specific time steps, retaining old data in a separate file. - - This is useful when performing more exotic analyses, where each "time step" may - not represent a specific point in time, but something more nuanced. For example, - equilibrium cases store a new "cycle" for each iteration as it attempts to - converge the equilibrium cycle. At the end of the run, the last "cycle" is the - converged equilibrium cycle, whereas the previous cycles constitute the path to - convergence, which we typically wish to discard before further analysis. - - Parameters - ---------- - keepTimeSteps - A collection of the time steps to retain - - label - An informative label for the backed-up database. Usually something like - "-all-iterations". Will be interposed between the source name and the ".h5" - extension. - - Returns - ------- - str - The name of the new, backed-up database file. - """ - if self.h5db is None: - raise ValueError("There is no open database to split.") - - self.h5db.close() - - backupDBPath = os.path.abspath(label.join(os.path.splitext(self._fileName))) - runLog.info("Retaining full database history in {}".format(backupDBPath)) - if self._fullPath is not None: - shutil.move(self._fullPath, backupDBPath) - - self.h5db = h5py.File(self._fullPath, self._permission) - dbOut = self.h5db - - with h5py.File(backupDBPath, "r") as dbIn: - dbOut.attrs.update(dbIn.attrs) - - # Copy everything except time node data - timeSteps = set() - for groupName, _ in dbIn.items(): - m = self.timeNodeGroupPattern.match(groupName) - if m: - timeSteps.add((int(m.group(1)), int(m.group(2)))) - else: - dbIn.copy(groupName, dbOut) - - if not set(keepTimeSteps).issubset(timeSteps): - raise ValueError( - "Not all desired time steps ({}) are even present in the " - "database".format(keepTimeSteps) - ) - - minCycle = next(iter(sorted(keepTimeSteps)))[0] - for cycle, node in keepTimeSteps: - offsetCycle = cycle - minCycle - offsetGroupName = getH5GroupName(offsetCycle, node) - dbIn.copy(getH5GroupName(cycle, node), dbOut, name=offsetGroupName) - dbOut[offsetGroupName + "/Reactor/cycle"][()] = offsetCycle - - return backupDBPath - - @property - def fileName(self): - return self._fileName - - @fileName.setter - def fileName(self, fName): - if self.h5db is not None: - raise RuntimeError("Cannot change Database file name while it's opened!") - self._fileName = fName - - def loadCS(self): - """Attempt to load settings from the database file. - - Notes - ----- - There are no guarantees here. If the database was written from a different version of ARMI than you are using, - these results may not be usable. For instance, the database could have been written from a vastly old or future - version of ARMI from the code you are using. - """ - cs = settings.Settings() - cs.caseTitle = os.path.splitext(os.path.basename(self.fileName))[0] - try: - cs.loadFromString(self.h5db["inputs/settings"].asstr()[()]) - except KeyError: - # not all paths to writing a database require inputs to be written to the - # database. Technically, settings do affect some of the behavior of database - # reading, so not having the settings that made the reactor that went into - # the database is not ideal. However, this isn't the right place to crash - # into it. Ideally, there would be not way to not have the settings in the - # database (force writing in writeToDB), or to make reading invariant to - # settings. - pass - - return cs - - def loadBlueprints(self): - """Attempt to load reactor blueprints from the database file. - - Notes - ----- - There are no guarantees here. If the database was written from a different version of ARMI than you are using, - these results may not be usable. For instance, the database could have been written from a vastly old or future - version of ARMI from the code you are using. - """ - # Blueprints use the yamlize package, which uses class attributes to define much of the class's behavior - # through metaclassing. Therefore, we need to be able to import all plugins *before* importing blueprints. - from armi.reactor.blueprints import Blueprints - - bpString = None - - try: - bpString = self.h5db["inputs/blueprints"].asstr()[()] - except KeyError: - # not all reactors need to be created from blueprints, so they may not exist - pass - - if not bpString: - # looks like no blueprints contents - return None - - stream = io.StringIO(bpString) - stream = Blueprints.migrate(stream) - - bp = Blueprints.load(stream) - return bp - - def loadGeometry(self): - """ - This is primarily just used for migrations. - - The "geometry files" were replaced by ``systems:`` and ``grids:`` sections of ``Blueprints``. - """ - geom = systemLayoutInput.SystemLayoutInput() - geom.readGeomFromStream(io.StringIO(self.h5db["inputs/geomFile"].asstr()[()])) - return geom - - def writeInputsToDB(self, cs, csString=None, geomString=None, bpString=None): - """ - Write inputs into the database based the Settings. - - This is not DRY on purpose. The goal is that any particular Database - implementation should be very stable, so we dont want it to be easy to change - one Database implementation's behavior when trying to change another's. - - .. impl:: The run settings are saved the settings file. - :id: I_ARMI_DB_CS - :implements: R_ARMI_DB_CS - - A ``Settings`` object is passed into this method, and then the settings are - converted into a YAML string stream. That stream is then written to the H5 - file. Optionally, this method can take a pre-build settings string to be - written directly to the file. - - .. impl:: The reactor blueprints are saved the settings file. - :id: I_ARMI_DB_BP - :implements: R_ARMI_DB_BP - - A ``Blueprints`` string is optionally passed into this method, and then - written to the H5 file. If it is not passed in, this method will attempt to - find the blueprints input file in the settings, and read the contents of - that file into a stream to be written to the H5 file. - - Notes - ----- - This is hard-coded to read the entire file contents into memory and write that - directly into the database. We could have the cs/blueprints/geom write to a - string, however the ARMI log file contains a hash of each files' contents. In - the future, we should be able to reproduce a calculation with confidence that - the inputs are identical. - """ - caseTitle = ( - cs.caseTitle if cs is not None else os.path.splitext(self.fileName)[0] - ) - self.h5db.attrs["caseTitle"] = caseTitle - if csString is None: - # don't read file; use what's in the cs now. - # Sometimes settings are modified in tests. - stream = io.StringIO() - cs.writeToYamlStream(stream) - stream.seek(0) - csString = stream.read() - - if bpString is None: - bpPath = pathlib.Path(cs.inputDirectory) / cs[CONF_LOADING_FILE] - # only store blueprints if we actually loaded from them - if bpPath.exists() and bpPath.is_file(): - # Ensure that the input as stored in the DB is complete - bpString = resolveMarkupInclusions( - pathlib.Path(cs.inputDirectory) / cs[CONF_LOADING_FILE] - ).read() - else: - bpString = "" - - self.h5db["inputs/settings"] = csString - self.h5db["inputs/geomFile"] = geomString or "" - self.h5db["inputs/blueprints"] = bpString - - def readInputsFromDB(self): - return ( - self.h5db["inputs/settings"].asstr()[()], - self.h5db["inputs/geomFile"].asstr()[()], - self.h5db["inputs/blueprints"].asstr()[()], - ) - - def mergeHistory(self, inputDB, startCycle, startNode): - """ - Copy time step data up to, but not including the passed cycle and node. - - Notes - ----- - This is used for restart runs with the standard operator for example. - The current time step (being loaded from) should not be copied, as that - time steps data will be written at the end of the time step. - """ - # iterate over the top level H5Groups and copy - for time, h5ts in zip(inputDB.genTimeSteps(), inputDB.genTimeStepGroups()): - cyc, tn = time - if cyc == startCycle and tn == startNode: - # all data up to current state are merged - return - self.h5db.copy(h5ts, h5ts.name) - - if inputDB.versionMinor < 2: - # The source database may have object references in some attributes. - # make sure to link those up using our manual path strategy. - references = [] - - def findReferences(name, obj): - for key, attr in obj.attrs.items(): - if isinstance(attr, h5py.h5r.Reference): - references.append((name, key, inputDB.h5db[attr].name)) - - h5ts.visititems(findReferences) - - for key, attr, path in references: - destTs = self.h5db[h5ts.name] - destTs[key].attrs[attr] = "@{}".format(path) - - def __enter__(self): - """Context management support.""" - if self._openCount == 0: - # open also increments _openCount - self.open() - else: - self._openCount += 1 - return self - - def __exit__(self, type, value, traceback): - """Typically we don't care why it broke but we want the DB to close.""" - self._openCount -= 1 - # always close if there is a traceback. - if self._openCount == 0 or traceback: - self.close(all(i is None for i in (type, value, traceback))) - - def __del__(self): - if self.h5db is not None: - self.close(False) - - def __delitem__(self, tn: Tuple[int, int, Optional[str]]): - cycle, timeNode, statePointName = tn - name = getH5GroupName(cycle, timeNode, statePointName) - if self.h5db is not None: - del self.h5db[name] - - def genTimeStepGroups( - self, timeSteps: Sequence[Tuple[int, int]] = None - ) -> Generator[h5py._hl.group.Group, None, None]: - """Returns a generator of HDF5 Groups for all time nodes, or for the passed selection.""" - assert ( - self.h5db is not None - ), "Must open the database before calling genTimeStepGroups" - if timeSteps is None: - for groupName, h5TimeNodeGroup in sorted(self.h5db.items()): - match = self.timeNodeGroupPattern.match(groupName) - if match: - yield h5TimeNodeGroup - else: - for step in timeSteps: - yield self.h5db[getH5GroupName(*step)] - - def getLayout(self, cycle, node): - """Return a Layout object representing the requested cycle and time node.""" - version = (self._versionMajor, self._versionMinor) - timeGroupName = getH5GroupName(cycle, node) - - return Layout(version, self.h5db[timeGroupName]) - - def genTimeSteps(self) -> Generator[Tuple[int, int], None, None]: - """Returns a generator of (cycle, node) tuples that are present in the DB.""" - assert ( - self.h5db is not None - ), "Must open the database before calling genTimeSteps" - for groupName in sorted(self.h5db.keys()): - match = self.timeNodeGroupPattern.match(groupName) - if match: - cycle = int(match.groups()[0]) - node = int(match.groups()[1]) - yield (cycle, node) - - def genAuxiliaryData(self, ts: Tuple[int, int]) -> Generator[str, None, None]: - """Returns a generator of names of auxiliary data on the requested time point.""" - assert ( - self.h5db is not None - ), "Must open the database before calling genAuxiliaryData" - cycle, node = ts - groupName = getH5GroupName(cycle, node) - timeGroup = self.h5db[groupName] - exclude = set(ArmiObject.TYPES.keys()) - exclude.add("layout") - return (groupName + "/" + key for key in timeGroup.keys() if key not in exclude) - - @staticmethod - def getAuxiliaryDataPath(ts: Tuple[int, int], name: str) -> str: - return getH5GroupName(*ts) + "/" + name - - def keys(self): - return (g.name for g in self.genTimeStepGroups()) - - def getH5Group(self, r, statePointName=None): - """ - Get the H5Group for the current ARMI timestep. - - This method can be used to allow other interfaces to place data into the database - at the correct timestep. - """ - groupName = getH5GroupName(r.p.cycle, r.p.timeNode, statePointName) - if groupName in self.h5db: - return self.h5db[groupName] - else: - group = self.h5db.create_group(groupName) - group.attrs["cycle"] = r.p.cycle - group.attrs["timeNode"] = r.p.timeNode - return group - - def hasTimeStep(self, cycle, timeNode, statePointName=""): - """Returns True if (cycle, timeNode, statePointName) is contained in the database.""" - return getH5GroupName(cycle, timeNode, statePointName) in self.h5db - - def writeToDB(self, reactor, statePointName=None): - assert self.h5db is not None, "Database must be open before writing." - # _createLayout is recursive - h5group = self.getH5Group(reactor, statePointName) - runLog.info("Writing to database for statepoint: {}".format(h5group.name)) - layout = Layout((self.versionMajor, self.versionMinor), comp=reactor) - layout.writeToDB(h5group) - groupedComps = layout.groupedComps - - for comps in groupedComps.values(): - self._writeParams(h5group, comps) - - def syncToSharedFolder(self): - """ - Copy DB to run working directory. - - Needed when multiple MPI processes need to read the same db, for example - when a history is needed from independent runs (e.g. for fuel performance on - a variety of assemblies). - - Notes - ----- - At some future point, we may implement a client-server like DB system which - would render this kind of operation unnecessary. - """ - runLog.extra("Copying DB to shared working directory.") - self.h5db.flush() - shutil.copy(self._fullPath, self._fileName) - - def load( - self, - cycle, - node, - cs=None, - bp=None, - statePointName=None, - allowMissing=False, - ): - """Load a new reactor from (cycle, node). - - Case settings and blueprints can be provided by the client, or read from the - database itself. Providing these from the client could be useful when - performing snapshot runs or where it is expected to use results from a run - using different settings and continue with new settings (or if blueprints are - not on the database). Geometry is read from the database itself. - - .. impl:: Users can load a reactor from a DB. - :id: I_ARMI_DB_R_LOAD - :implements: R_ARMI_DB_R_LOAD - - This method creates a ``Reactor`` object by reading the reactor state out - of an ARMI database file. This is done by passing in mandatory arguements - that specify the exact place in time you want to load the reactor from. - (That is, the cycle and node numbers.) Users can either pass the settings - and blueprints directly into this method, or it will attempt to read them - from the database file. The primary work done here is to read the hierarchy - of reactor objects from the data file, then reconstruct them in the correct - order. - - Parameters - ---------- - cycle : int - Cycle number - node : int - Time node. If value is negative, will be indexed from EOC backwards - like a list. - cs : armi.settings.Settings (optional) - If not provided one is read from the database - bp : armi.reactor.Blueprints (optional) - If not provided one is read from the database - statePointName : str - Optional arbitrary statepoint name (e.g., "special" for "c00n00-special/") - allowMissing : bool, optional - Whether to emit a warning, rather than crash if reading a database - with undefined parameters. Default False. - - Returns - ------- - root : Reactor - The top-level object stored in the database; a Reactor. - """ - runLog.info("Loading reactor state for time node ({}, {})".format(cycle, node)) - - cs = cs or self.loadCS() - bp = bp or self.loadBlueprints() - - if node < 0: - numNodes = getNodesPerCycle(cs)[cycle] - if (node + numNodes) < 0: - raise ValueError( - f"Node {node} specified does not exist for cycle {cycle}" - ) - node = numNodes + node - - h5group = self.h5db[getH5GroupName(cycle, node, statePointName)] - - layout = Layout((self.versionMajor, self.versionMinor), h5group=h5group) - comps, groupedComps = layout._initComps(cs.caseTitle, bp) - - # populate data onto initialized components - for compType, compTypeList in groupedComps.items(): - self._readParams(h5group, compType, compTypeList, allowMissing=allowMissing) - - # assign params from blueprints - if bp is not None: - self._assignBlueprintsParams(bp, groupedComps) - - # stitch together - self._compose(iter(comps), cs) - - # also, make sure to update the global serial number so we don't re-use a number - parameterCollections.GLOBAL_SERIAL_NUM = max( - parameterCollections.GLOBAL_SERIAL_NUM, layout.serialNum.max() - ) - root = comps[0][0] - - # return a Reactor object - if cs[CONF_SORT_REACTOR]: - root.sort() - else: - runLog.warning( - "DeprecationWarning: This Reactor is not being sorted on DB load. " - f"Due to the setting {CONF_SORT_REACTOR}, this Reactor is unsorted. " - "But this feature is temporary and will be removed by 2024." - ) - - return root - - @staticmethod - def _assignBlueprintsParams(blueprints, groupedComps): - for compType, designs in ( - (Block, blueprints.blockDesigns), - (Assembly, blueprints.assemDesigns), - ): - paramsToSet = { - pDef.name - for pDef in compType.pDefs.inCategory( - parameters.Category.assignInBlueprints - ) - } - - for comp in groupedComps[compType]: - design = designs[comp.p.type] - for pName in paramsToSet: - val = getattr(design, pName) - if val is not None: - comp.p[pName] = val - - def _compose(self, comps, cs, parent=None): - """Given a flat collection of all of the ArmiObjects in the model, reconstitute the hierarchy.""" - comp, _, numChildren, location = next(comps) - - # attach the parent early, if provided; some cases need the parent attached for - # the rest of _compose to work properly. - comp.parent = parent - - # The Reactor adds a Core child by default, this is not ideal - for spontaneousChild in list(comp): - comp.remove(spontaneousChild) - - if isinstance(comp, Core): - pass - elif isinstance(comp, Assembly): - # Assemblies force their name to be something based on assemNum. When the - # assembly is created it gets a new assemNum, and throws out the correct - # name that we read from the DB - comp.name = comp.makeNameFromAssemNum(comp.p.assemNum) - comp.lastLocationLabel = Assembly.DATABASE - - # set the spatialLocators on each component - if location is not None: - if parent is not None and parent.spatialGrid is not None: - comp.spatialLocator = parent.spatialGrid[location] - else: - comp.spatialLocator = grids.CoordinateLocation( - location[0], location[1], location[2], None - ) - - # Need to keep a collection of Component instances for linked dimension - # resolution, before they can be add()ed to their parents. Not just filtering - # out of `children`, since resolveLinkedDims() needs a dict - childComponents = collections.OrderedDict() - children = [] - - for _ in range(numChildren): - child = self._compose(comps, cs, parent=comp) - children.append(child) - if isinstance(child, Component): - childComponents[child.name] = child - - for _childName, child in childComponents.items(): - child.resolveLinkedDims(childComponents) - - for child in children: - comp.add(child) - - if isinstance(comp, Core): - # TODO: This is also an issue related to geoms and which core is "The Core". - # We only have a good geom for the main core, so can't do process loading on - # the SFP, etc. - if comp.hasFlags(Flags.CORE): - comp.processLoading(cs, dbLoad=True) - elif isinstance(comp, Assembly): - comp.calculateZCoords() - - return comp - - def _writeParams(self, h5group, comps): - c = comps[0] - groupName = c.__class__.__name__ - if groupName not in h5group: - # Only create the group if it doesnt already exist. This happens when - # re-writing params in the same time node (e.g. something changed between - # EveryNode and EOC) - g = h5group.create_group(groupName) - else: - g = h5group[groupName] - - for paramDef in c.p.paramDefs.toWriteToDB(): - attrs = {} - - if hasattr(c, "DIMENSION_NAMES") and paramDef.name in c.DIMENSION_NAMES: - linkedDims = [] - data = [] - - for _, c in enumerate(comps): - val = c.p[paramDef.name] - if isinstance(val, tuple): - linkedDims.append("{}.{}".format(val[0].name, val[1])) - data.append(val[0].getDimension(val[1])) - else: - linkedDims.append("") - data.append(val) - - data = numpy.array(data) - if any(linkedDims): - attrs["linkedDims"] = numpy.array(linkedDims).astype("S") - else: - # NOTE: after loading, the previously unset values will be defaulted - temp = [c.p.get(paramDef.name, paramDef.default) for c in comps] - if paramDef.serializer is not None: - data, sAttrs = paramDef.serializer.pack(temp) - assert ( - data.dtype.kind != "O" - ), "{} failed to convert {} to a numpy-supported type.".format( - paramDef.serializer.__name__, paramDef.name - ) - attrs.update(sAttrs) - attrs[_SERIALIZER_NAME] = paramDef.serializer.__name__ - attrs[_SERIALIZER_VERSION] = paramDef.serializer.version - else: - data = numpy.array(temp) - del temp - - # Convert Unicode to byte-string - if data.dtype.kind == "U": - data = data.astype("S") - - if data.dtype.kind == "O": - # Something was added to the data array that caused numpy to want to - # treat it as a general-purpose Object array. This usually happens - # because: - # - the data contain NoDefaults - # - the data contain one or more Nones, - # - the data contain special types like tuples, dicts, etc - # - the data are composed of arrays that numpy would otherwise happily - # convert to a higher-order array, but the dimensions of the sub-arrays - # are inconsistent ("jagged") - # - there is some sort of honest-to-goodness weird object - # We want to support the first two cases with minimal intrusion, since - # these should be pretty easy to faithfully represent in the db. The - # jagged case should be supported as well, but may require a less - # faithful representation (e.g. flattened), but the last case isn't - # really worth supporting. - - # Here is one proposal: - # - Check to see if the array is jagged. all(shape == shape[0]). If not, - # flatten, store the data offsets and array shapes, and None locations - # as attrs - # - If not jagged, all top-level ndarrays are the same shape, so it is - # easier to replace Nones with ndarrays filled with special values. - if parameters.NoDefault in data: - data = None - else: - data, specialAttrs = packSpecialData(data, paramDef.name) - attrs.update(specialAttrs) - - if data is None: - continue - - try: - if paramDef.name in g: - raise ValueError( - "`{}` was already in `{}`. This time node " - "should have been empty".format(paramDef.name, g) - ) - - dataset = g.create_dataset(paramDef.name, data=data, compression="gzip") - if any(attrs): - Database3._writeAttrs(dataset, h5group, attrs) - except Exception: - runLog.error( - "Failed to write {} to database. Data: " - "{}".format(paramDef.name, data) - ) - raise - if isinstance(c, Block): - self._addHomogenizedNumberDensityParams(comps, g) - - @staticmethod - def _addHomogenizedNumberDensityParams(blocks, h5group): - """ - Create on-the-fly block homog. number density params for XTVIEW viewing. - - See Also - -------- - collectBlockNumberDensities - """ - nDens = collectBlockNumberDensities(blocks) - - for nucName, numDens in nDens.items(): - h5group.create_dataset(nucName, data=numDens, compression="gzip") - - @staticmethod - def _readParams(h5group, compTypeName, comps, allowMissing=False): - g = h5group[compTypeName] - - renames = getApp().getParamRenames() - - pDefs = comps[0].pDefs - - # this can also be made faster by specializing the method by type - for paramName, dataSet in g.items(): - # Honor historical databases where the parameters may have changed names - # since. - while paramName in renames: - paramName = renames[paramName] - - try: - pDef = pDefs[paramName] - except KeyError: - if re.match(r"^n[A-Z][a-z]?\d*", paramName): - # This is a temporary viz param (number density) made by - # _addHomogenizedNumberDensityParams ignore it safely - continue - else: - # If a parameter exists in the database but not in the application - # reading it, we can technically keep going. Since this may lead to - # potential correctness issues, raise a warning - if allowMissing: - runLog.warning( - "Found `{}` parameter `{}` in the database, which is not defined. " - "Ignoring it.".format(compTypeName, paramName) - ) - continue - else: - raise - - data = dataSet[:] - attrs = Database3._resolveAttrs(dataSet.attrs, h5group) - - if pDef.serializer is not None: - assert _SERIALIZER_NAME in dataSet.attrs - assert dataSet.attrs[_SERIALIZER_NAME] == pDef.serializer.__name__ - assert _SERIALIZER_VERSION in dataSet.attrs - - data = numpy.array( - pDef.serializer.unpack( - data, dataSet.attrs[_SERIALIZER_VERSION], attrs - ) - ) - - if data.dtype.type is numpy.string_: - data = numpy.char.decode(data) - - if attrs.get("specialFormatting", False): - data = unpackSpecialData(data, attrs, paramName) - - linkedDims = [] - if "linkedDims" in attrs: - linkedDims = numpy.char.decode(attrs["linkedDims"]) - - # iterating of numpy is not fast... - for c, val, linkedDim in itertools.zip_longest( - comps, data.tolist(), linkedDims, fillvalue="" - ): - try: - if linkedDim != "": - c.p[paramName] = linkedDim - else: - c.p[paramName] = val - except AssertionError as ae: - # happens when a param was deprecated but being loaded from old DB - runLog.warning( - f"{str(ae)}\nSkipping load of invalid param `{paramName}`" - " (possibly loading from old DB)\n" - ) - - def getHistoryByLocation( - self, - comp: ArmiObject, - params: Optional[List[str]] = None, - timeSteps: Optional[Sequence[Tuple[int, int]]] = None, - ) -> History: - """Get the parameter histories at a specific location.""" - return self.getHistoriesByLocation([comp], params=params, timeSteps=timeSteps)[ - comp - ] - - def getHistoriesByLocation( - self, - comps: Sequence[ArmiObject], - params: Optional[List[str]] = None, - timeSteps: Optional[Sequence[Tuple[int, int]]] = None, - ) -> Histories: - """ - Get the parameter histories at specific locations. - - This has a number of limitations, which should in practice not be too limiting: - - The passed objects must have IndexLocations. This type of operation doesn't - make much sense otherwise. - - The passed objects must exist in a hierarchy that leads to a Core - object, which serves as an anchor that can fully define all index locations. - This could possibly be made more general by extending grids, but that gets a - little more complicated. - - All requested objects must exist under the **same** anchor object, and at the - same depth below it. - - All requested objects must have the same type. - - Parameters - ---------- - comps : list of ArmiObject - The components/composites that currently occupy the location that you want - histories at. ArmiObjects are passed, rather than locations, because this - makes it easier to figure out things related to layout. - params : List of str, optional - The parameter names for the parameters that we want the history of. If None, - all parameter history is given - timeSteps : List of (cycle, node) tuples, optional - The time nodes that you want history for. If None, all available time nodes - will be returned. - """ - if self.versionMinor < 4: - raise ValueError( - "Location-based histories are only supported for db " - "version 3.4 and greater. This database is version " - f"{self.versionMajor}, {self.versionMinor}." - ) - - locations = [c.spatialLocator.getCompleteIndices() for c in comps] - - histData: Histories = { - c: collections.defaultdict(collections.OrderedDict) for c in comps - } - - # Check our assumptions about the passed locations: - # All locations must have the same parent and bear the same relationship to the - # anchor object - anchors = { - obj.getAncestorAndDistance(lambda a: isinstance(a, Core)) for obj in comps - } - - if len(anchors) != 1: - raise ValueError( - "The passed objects do not have the same anchor or distance to that " - "anchor; encountered the following: {}".format(anchors) - ) - - anchorInfo = anchors.pop() - if anchorInfo is not None: - anchor, anchorDistance = anchorInfo - else: - raise ValueError( - "Could not determine an anchor object for the passed components" - ) - - anchorSerialNum = anchor.p.serialNum - - # All objects of the same type - objectTypes = {type(obj) for obj in comps} - if len(objectTypes) != 1: - raise TypeError( - "The passed objects must be the same type; got objects of " - "types `{}`".format(objectTypes) - ) - - compType = objectTypes.pop() - objClassName = compType.__name__ - - locToComp = {c.spatialLocator.getCompleteIndices(): c for c in comps} - - for h5TimeNodeGroup in self.genTimeStepGroups(timeSteps): - if "layout" not in h5TimeNodeGroup: - # layout hasnt been written for this time step, so we can't get anything - # useful here. Perhaps the current value is of use, in which case the - # DatabaseInterface should be used. - continue - - cycle = h5TimeNodeGroup.attrs["cycle"] - timeNode = h5TimeNodeGroup.attrs["timeNode"] - layout = Layout( - (self.versionMajor, self.versionMinor), h5group=h5TimeNodeGroup - ) - - ancestors = layout.computeAncestors( - layout.serialNum, layout.numChildren, depth=anchorDistance - ) - - lLocation = layout.location - # filter for objects that live under the desired ancestor and at a desired location - objectIndicesInLayout = numpy.array( - [ - i - for i, (ancestor, loc) in enumerate(zip(ancestors, lLocation)) - if ancestor == anchorSerialNum and loc in locations - ] - ) - - # This could also be way more efficient if lLocation were a numpy array - objectLocationsInLayout = [lLocation[i] for i in objectIndicesInLayout] - - objectIndicesInData = numpy.array(layout.indexInData)[ - objectIndicesInLayout - ].tolist() - - try: - h5GroupForType = h5TimeNodeGroup[objClassName] - except KeyError as ee: - runLog.error( - "{} not found in {} of {}".format( - objClassName, h5TimeNodeGroup, self - ) - ) - raise ee - - for paramName in params or h5GroupForType.keys(): - if paramName == "location": - # location is special, since it is stored in layout/ - data = numpy.array(layout.location)[objectIndicesInLayout] - elif paramName in h5GroupForType: - dataSet = h5GroupForType[paramName] - try: - data = dataSet[objectIndicesInData] - except: - runLog.error( - "Failed to load index {} from {}@{}".format( - objectIndicesInData, dataSet, (cycle, timeNode) - ) - ) - raise - - if data.dtype.type is numpy.string_: - data = numpy.char.decode(data) - - if dataSet.attrs.get("specialFormatting", False): - if dataSet.attrs.get("nones", False): - data = replaceNonsenseWithNones(data, paramName) - else: - raise ValueError( - "History tracking for non-None, " - "special-formatted parameters is not supported: " - "{}, {}".format( - paramName, {k: v for k, v in dataSet.attrs.items()} - ) - ) - else: - # Nothing in the database for this param, so use the default value - data = numpy.repeat( - parameters.byNameAndType(paramName, compType).default, - len(comps), - ) - - # store data to the appropriate comps. This is where taking components - # as the argument (rather than locations) is a little bit peculiar. - # - # At this point, `data` are arranged by the order of elements in - # `objectIndicesInData`, which corresponds to the order of - # `objectIndicesInLayout` - for loc, val in zip(objectLocationsInLayout, data.tolist()): - comp = locToComp[loc] - histData[comp][paramName][cycle, timeNode] = val - return histData - - def getHistory( - self, - comp: ArmiObject, - params: Optional[Sequence[str]] = None, - timeSteps: Optional[Sequence[Tuple[int, int]]] = None, - ) -> History: - """ - Get parameter history for a single ARMI Object. - - Parameters - ---------- - comps - An individual ArmiObject - params - parameters to gather - - Returns - ------- - dict - Dictionary of str/list pairs. - """ - return self.getHistories([comp], params, timeSteps)[comp] - - def getHistories( - self, - comps: Sequence[ArmiObject], - params: Optional[Sequence[str]] = None, - timeSteps: Optional[Sequence[Tuple[int, int]]] = None, - ) -> Histories: - """ - Get the parameter histories for a sequence of ARMI Objects. - - This implementation is unaware of the state of the reactor outside of the - database itself, and is therefore not usually what client code should be calling - directly during normal ARMI operation. It only knows about historical data that - have actually been written to the database. Usually one wants to be able to get - historical, plus current data, for which the similar method on the - DatabaseInterface may be more useful. - - Parameters - ---------- - comps - Something that is iterable multiple times - params - parameters to gather. - timeSteps - Selection of time nodes to get data for. If omitted, return full history - - Returns - ------- - dict - Dictionary ArmiObject (input): dict of str/list pairs containing ((cycle, - node), value). - """ - histData: Histories = { - c: collections.defaultdict(collections.OrderedDict) for c in comps - } - types = {c.__class__ for c in comps} - compsByTypeThenSerialNum: Dict[Type[ArmiObject], Dict[int, ArmiObject]] = { - t: dict() for t in types - } - - for c in comps: - compsByTypeThenSerialNum[c.__class__][c.p.serialNum] = c - - for h5TimeNodeGroup in self.genTimeStepGroups(timeSteps): - if "layout" not in h5TimeNodeGroup: - # Layout hasn't been written for this time step, so whatever is in there - # didn't come from the DatabaseInterface. Probably because it's the - # current time step and something has created the group to store aux - # data - continue - - cycle = h5TimeNodeGroup.attrs["cycle"] - timeNode = h5TimeNodeGroup.attrs["timeNode"] - layout = Layout( - (self.versionMajor, self.versionMinor), h5group=h5TimeNodeGroup - ) - - for compType, compsBySerialNum in compsByTypeThenSerialNum.items(): - compTypeName = compType.__name__ - try: - h5GroupForType = h5TimeNodeGroup[compTypeName] - except KeyError as ee: - runLog.error( - "{} not found in {} of {}".format( - compTypeName, h5TimeNodeGroup, self - ) - ) - raise ee - layoutIndicesForType = numpy.where(layout.type == compTypeName)[0] - serialNumsForType = layout.serialNum[layoutIndicesForType].tolist() - layoutIndexInData = layout.indexInData[layoutIndicesForType].tolist() - - indexInData = [] - reorderedComps = [] - - for ii, sn in zip(layoutIndexInData, serialNumsForType): - d = compsBySerialNum.get(sn, None) - if d is not None: - indexInData.append(ii) - reorderedComps.append(d) - - if not indexInData: - continue - - # note this is very similar to _readParams, but there are some important - # differences. - # 1) we are not assigning to p[paramName] - # 2) not using linkedDims at all - # 3) not performing parameter renaming. This may become necessary - for paramName in params or h5GroupForType.keys(): - if paramName == "location": - # cast to a numpy array so that we can use list indices - data = numpy.array(layout.location)[layoutIndicesForType][ - indexInData - ] - elif paramName in h5GroupForType: - dataSet = h5GroupForType[paramName] - try: - data = dataSet[indexInData] - except: - runLog.error( - "Failed to load index {} from {}@{}".format( - indexInData, dataSet, (cycle, timeNode) - ) - ) - raise - - if data.dtype.type is numpy.string_: - data = numpy.char.decode(data) - - if dataSet.attrs.get("specialFormatting", False): - if dataSet.attrs.get("nones", False): - data = replaceNonsenseWithNones(data, paramName) - else: - raise ValueError( - "History tracking for non-none special formatting " - "not supported: {}, {}".format( - paramName, - {k: v for k, v in dataSet.attrs.items()}, - ) - ) - else: - # Nothing in the database, so use the default value - data = numpy.repeat( - parameters.byNameAndType(paramName, compType).default, - len(reorderedComps), - ) - - # iterating of numpy is not fast.. - for c, val in zip(reorderedComps, data.tolist()): - if isinstance(val, list): - val = numpy.array(val) - - histData[c][paramName][cycle, timeNode] = val - - r = comps[0].getAncestorWithFlags(Flags.REACTOR) - cycleNode = r.p.cycle, r.p.timeNode - for c, paramHistories in histData.items(): - for paramName, hist in paramHistories.items(): - if cycleNode not in hist: - try: - hist[cycleNode] = c.p[paramName] - except: # noqa: bare-except - if paramName == "location": - hist[cycleNode] = c.spatialLocator.indices - - return histData - - @staticmethod - def _writeAttrs(obj, group, attrs): - """ - Handle safely writing attributes to a dataset, handling large data if necessary. - - This will attempt to store attributes directly onto an HDF5 object if possible, - falling back to proper datasets and reference attributes if necessary. This is - needed because HDF5 tries to fit attributes into the object header, which has - limited space. If an attribute is too large, h5py raises a RuntimeError. - In such cases, this will store the attribute data in a proper dataset and - place a reference to that dataset in the attribute instead. - - In practice, this takes ``linkedDims`` attrs from a particular component type (like - ``c00n00/Circle/id``) and stores them in new datasets (like - ``c00n00/attrs/1_linkedDims``, ``c00n00/attrs/2_linkedDims``) and then sets the - object's attrs to links to those datasets. - """ - for key, value in attrs.items(): - try: - obj.attrs[key] = value - except RuntimeError as err: - if "object header message is too large" not in err.args[0]: - raise - - runLog.info( - "Storing attribute `{}` for `{}` into it's own dataset within " - "`{}/attrs`".format(key, obj, group) - ) - - if "attrs" not in group: - attrGroup = group.create_group("attrs") - else: - attrGroup = group["attrs"] - dataName = str(len(attrGroup)) + "_" + key - attrGroup[dataName] = value - - # using a soft link here allows us to cheaply copy time nodes without - # needing to crawl through and update object references. - linkName = attrGroup[dataName].name - obj.attrs[key] = "@{}".format(linkName) - - @staticmethod - def _resolveAttrs(attrs, group): - """ - Reverse the action of _writeAttrs. - - This reads actual attrs and looks for the real data - in the datasets that the attrs were pointing to. - """ - attr_link = re.compile("^@(.*)$") - - resolved = {} - for key, val in attrs.items(): - try: - if isinstance(val, h5py.h5r.Reference): - # Old style object reference. If this cannot be dereferenced, it is - # likely because mergeHistory was used to get the current database, - # which does not preserve references. - resolved[key] = group[val] - elif isinstance(val, str): - m = attr_link.match(val) - if m: - # dereference the path to get the data out of the dataset. - resolved[key] = group[m.group(1)][()] - else: - resolved[key] = val - else: - resolved[key] = val - except ValueError: - runLog.error(f"HDF error loading {key} : {val}\nGroup: {group}") - raise - - return resolved - - -def packSpecialData( - data: numpy.ndarray, paramName: str -) -> Tuple[Optional[numpy.ndarray], Dict[str, Any]]: - """ - Reduce data that wouldn't otherwise play nicely with HDF5/numpy arrays to a format - that will. - - This is the main entry point for conforming "strange" data into something that will - both fit into a numpy array/HDF5 dataset, and be recoverable to its original-ish - state when reading it back in. This is accomplished by detecting a handful of known - offenders and using various HDF5 attributes to store necessary auxiliary data. It is - important to keep in mind that the data that is passed in has already been converted - to a numpy array, so the top dimension is always representing the collection of - composites that are storing the parameters. For instance, if we are dealing with a - Block parameter, the first index in the numpy array of data is the block index; so - if each block has a parameter that is a dictionary, ``data`` would be a ndarray, - where each element is a dictionary. This routine supports a number of different - "strange" things: - - * Dict[str, float]: These are stored by finding the set of all keys for all - instances, and storing those keys as a list in an attribute. The data themselves - are stored as arrays indexed by object, then key index. Dictionaries lacking data - for a key store a nan in it's place. This will work well in instances where most - objects have data for most keys. - * Jagged arrays: These are stored by concatenating all of the data into a single, - one-dimensional array, and storing attributes to describe the shapes of each - object's data, and an offset into the beginning of each object's data. - * Arrays with ``None`` in them: These are stored by replacing each instance of - ``None`` with a magical value that shouldn't be encountered in realistic - scenarios. - - Parameters - ---------- - data - An ndarray storing the data that we want to stuff into the database. These are - usually dtype=Object, which is how we usually end up here in the first place. - - paramName - The parameter name that we are trying to store data for. This is mostly used for - diagnostics. - - See Also - -------- - unpackSpecialData - """ - # Check to make sure that we even need to do this. If the numpy data type is - # not "O", chances are we have nice, clean data. - if data.dtype != "O": - return data, {} - - attrs: Dict[str, Any] = {"specialFormatting": True} - - # make a copy of the data, so that the original is unchanged - data = copy.copy(data) - - # find locations of Nones. The below works for ndarrays, whereas `data == None` - # gives a single True/False value - nones = numpy.where([d is None for d in data])[0] - - if len(nones) == data.shape[0]: - # Everything is None, so why bother? - return None, attrs - - if len(nones) > 0: - attrs["nones"] = True - - # TODO: this whole if/then/elif/else can be optimized by looping once and then - # determining the correct action - # A robust solution would need - # to do this on a case-by-case basis, and re-do it any time we want to - # write, since circumstances may change. Not only that, but we may need - # to do perform more that one of these operations to get to an array - # that we want to put in the database. - if any(isinstance(d, dict) for d in data): - # we're assuming that a dict is {str: float}. We store the union of - # all of the keys for all of the objects as a special "keys" - # attribute, and store a value for all of those keys for all - # objects, whether or not there is actually data associated with - # that key (storing a nan when no data). This makes for a simple - # approach that is somewhat digestible just looking at the db, and - # should be quite efficient in the case where most objects have data - # for most keys. - attrs["dict"] = True - keys = sorted({k for d in data for k in d}) - data = numpy.array([[d.get(k, numpy.nan) for k in keys] for d in data]) - if data.dtype == "O": - # The data themselves are nasty. We could support this, but best to wait for - # a credible use case. - raise TypeError( - "Unable to coerce dictionary data into usable numpy array for " - "{}".format(paramName) - ) - attrs["keys"] = numpy.array(keys).astype("S") - - return data, attrs - - # conform non-numpy arrays to numpy - for i, val in enumerate(data): - if isinstance(val, (list, tuple)): - data[i] = numpy.array(val) - - if not any(isinstance(d, numpy.ndarray) for d in data): - # looks like 1-D plain-old-data - data = replaceNonesWithNonsense(data, paramName, nones) - return data, attrs - - # check if data is jagged - candidate = next((d for d in data if d is not None)) - shape = candidate.shape - ndim = candidate.ndim - isJagged = ( - not all(d.shape == shape for d in data if d is not None) or candidate.size == 0 - ) - - if isJagged: - assert all( - val.ndim == ndim for val in data if val is not None - ), "Inconsistent dimensions in jagged array for: {}\nDimensions: {}".format( - paramName, [val.ndim for val in data if val is not None] - ) - attrs["jagged"] = True - - # offsets[i] is the index of the zero-th element of sub-array i - offsets = numpy.array( - [0] - + list( - itertools.accumulate(val.size if val is not None else 0 for val in data) - )[:-1] - ) - - # shapes[i] is the shape of the i-th sub-array. Nones are represented by all - # zeros - shapes = numpy.array( - list(val.shape if val is not None else ndim * (0,) for val in data) - ) - - data = numpy.delete(data, nones) - - data = numpy.concatenate(data, axis=None) - - attrs["offsets"] = offsets - attrs["shapes"] = shapes - attrs["noneLocations"] = nones - return data, attrs - - if any(isinstance(d, (tuple, list, numpy.ndarray)) for d in data): - data = replaceNonesWithNonsense(data, paramName, nones) - return data, attrs - - if len(nones) == 0: - raise TypeError( - "Cannot write {} to the database, it did not resolve to a numpy/HDF5 " - "type.".format(paramName) - ) - - runLog.error("Data unable to find special none value: {}".format(data)) - raise TypeError("Failed to process special data for {}".format(paramName)) - - -def unpackSpecialData(data: numpy.ndarray, attrs, paramName: str) -> numpy.ndarray: - """ - Extract data from a specially-formatted HDF5 dataset into a numpy array. - - This should invert the operations performed by :py:func:`packSpecialData`. - - Parameters - ---------- - data - Specially-formatted data array straight from the database. - - attrs - The attributes associated with the dataset that contained the data. - - paramName - The name of the parameter that is being unpacked. Only used for diagnostics. - - Returns - ------- - numpy.ndarray - An ndarray containing the closest possible representation of the data that was - originally written to the database. - - See Also - -------- - packSpecialData - """ - if not attrs.get("specialFormatting", False): - # The data were not subjected to any special formatting; short circuit. - assert data.dtype != "O" - return data - - unpackedData: List[Any] - if attrs.get("nones", False) and not attrs.get("jagged", False): - data = replaceNonsenseWithNones(data, paramName) - return data - if attrs.get("jagged", False): - offsets = attrs["offsets"] - shapes = attrs["shapes"] - ndim = len(shapes[0]) - emptyArray = numpy.ndarray(ndim * (0,), dtype=data.dtype) - unpackedJaggedData: List[Optional[numpy.ndarray]] = [] - for offset, shape in zip(offsets, shapes): - if tuple(shape) == ndim * (0,): - # Start with an empty array. This may be replaced with a None later - unpackedJaggedData.append(emptyArray) - else: - unpackedJaggedData.append( - numpy.ndarray(shape, dtype=data.dtype, buffer=data[offset:]) - ) - for i in attrs["noneLocations"]: - unpackedJaggedData[i] = None - - return numpy.array(unpackedJaggedData, dtype=object) - if attrs.get("dict", False): - keys = numpy.char.decode(attrs["keys"]) - unpackedData = [] - assert data.ndim == 2 - for d in data: - unpackedData.append( - {key: value for key, value in zip(keys, d) if not numpy.isnan(value)} - ) - return numpy.array(unpackedData) - - raise ValueError( - "Do not recognize the type of special formatting that was applied " - "to {}. Attrs: {}".format(paramName, {k: v for k, v in attrs.items()}) - ) - - -def collectBlockNumberDensities(blocks) -> Dict[str, numpy.ndarray]: - """ - Collect block-by-block homogenized number densities for each nuclide. - - Long ago, composition was stored on block params. No longer; they are on the - component numberDensity params. These block-level params, are still useful to see - compositions in some visualization tools. Rather than keep them on the reactor - model, we dynamically compute them here and slap them in the database. These are - ignored upon reading and will not affect the results. - - Remove this once a better viz tool can view composition distributions. Also remove - the try/except in ``_readParams`` - """ - nucNames = sorted(list(set(nucName for b in blocks for nucName in b.getNuclides()))) - nucBases = [nuclideBases.byName[nn] for nn in nucNames] - # it's faster to loop over blocks first and get all number densities from each - # than it is to get one nuclide at a time from each block because of area fraction - # calculations. So we use some RAM here instead. - nucDensityMatrix = [] - for block in blocks: - nucDensityMatrix.append(block.getNuclideNumberDensities(nucNames)) - nucDensityMatrix = numpy.array(nucDensityMatrix) +# ruff: noqa: F403 +from armi.bookkeeping.db.database import * - dataDict = dict() - for ni, nb in enumerate(nucBases): - # the nth column is a vector of nuclide densities for this nuclide across all blocks - dataDict[nb.getDatabaseName()] = nucDensityMatrix[:, ni] - return dataDict +# ruff: noqa: F405 +class Database3(Database): + pass diff --git a/armi/bookkeeping/db/databaseInterface.py b/armi/bookkeeping/db/databaseInterface.py index 5c90a6d03..454885a73 100644 --- a/armi/bookkeeping/db/databaseInterface.py +++ b/armi/bookkeeping/db/databaseInterface.py @@ -30,7 +30,7 @@ from armi import context from armi import interfaces from armi import runLog -from armi.bookkeeping.db.database3 import Database3, getH5GroupName +from armi.bookkeeping.db.database import Database, getH5GroupName from armi.reactor.parameters import parameterDefinitions from armi.reactor.composites import ArmiObject from armi.bookkeeping.db.typedefs import History, Histories @@ -122,7 +122,7 @@ def initDB(self, fName: Optional[os.PathLike] = None): "title. This could lead to data loss! Rename the reload DB or the " "case." ) - self._db = Database3(self._dbPath, "w") + self._db = Database(self._dbPath, "w") self._db.open() # Grab geomString here because the DB-level has no access to the reactor or @@ -207,7 +207,7 @@ def interactDistributeState(self) -> None: if context.MPI_RANK > 0: # DB may not exist if distribute state is called early. if self._dbPath is not None and os.path.exists(self._dbPath): - self._db = Database3(self._dbPath, "r") + self._db = Database(self._dbPath, "r") self._db.open() def distributable(self): @@ -228,7 +228,7 @@ def prepRestartRun(self): This method loads the state of a reactor from a particular point in time from a standard ARMI - :py:class:`Database `. This is a + :py:class:`Database `. This is a major use-case for having ARMI databases in the first case. And restarting from such a database is easy, you just need to set a few settings:: @@ -252,7 +252,7 @@ def prepRestartRun(self): startCycle = self.cs["startCycle"] startNode = self.cs["startNode"] - with Database3(reloadDBName, "r") as inputDB: + with Database(reloadDBName, "r") as inputDB: loadDbCs = inputDB.loadCS() # pull the history up to the cycle/node prior to `startCycle`/`startNode` @@ -313,12 +313,12 @@ def _getLoadDB(self, fileName): if self._db is not None and fileName == self._db._fileName: yield self._db elif os.path.exists(fileName): - yield Database3(fileName, "r") + yield Database(fileName, "r") else: if self._db is not None: yield self._db if os.path.exists(self.cs["reloadDBName"]): - yield Database3(self.cs["reloadDBName"], "r") + yield Database(self.cs["reloadDBName"], "r") def loadState(self, cycle, timeNode, timeStepName="", fileName=None): """ @@ -372,12 +372,12 @@ def getHistory( """ Get historical parameter values for a single object. - This is mostly a wrapper around the same function on the ``Database3`` class, + This is mostly a wrapper around the same function on the ``Database`` class, but knows how to return the current value as well. See Also -------- - Database3.getHistory + Database.getHistory """ # make a copy so that we can potentially remove timesteps without affecting the # caller @@ -412,12 +412,12 @@ def getHistories( """ Get historical parameter values for one or more objects. - This is mostly a wrapper around the same function on the ``Database3`` class, + This is mostly a wrapper around the same function on the ``Database`` class, but knows how to return the current value as well. See Also -------- - Database3.getHistories + Database.getHistories """ now = (self.r.p.cycle, self.r.p.timeNode) nowRequested = timeSteps is None diff --git a/armi/bookkeeping/db/factory.py b/armi/bookkeeping/db/factory.py index ba7663b6c..91538bab7 100644 --- a/armi/bookkeeping/db/factory.py +++ b/armi/bookkeeping/db/factory.py @@ -15,7 +15,7 @@ import pathlib from typing import Optional -from armi.bookkeeping.db.database3 import Database3 +from armi.bookkeeping.db.database import Database from armi.bookkeeping.db import permissions @@ -74,7 +74,7 @@ def databaseFactory(dbName: str, permission: str, version: Optional[str] = None) ) if majorversion == "3": - return Database3(dbPath, permission) + return Database(dbPath, permission) raise ValueError("Unable to determine Database version for {}".format(dbName)) @@ -87,6 +87,6 @@ def databaseFactory(dbName: str, permission: str, version: Optional[str] = None) "migrate." ) if majorversion == "3": - return Database3(dbPath, permission) + return Database(dbPath, permission) return None diff --git a/armi/bookkeeping/db/layout.py b/armi/bookkeeping/db/layout.py index 4dad92606..1d4732812 100644 --- a/armi/bookkeeping/db/layout.py +++ b/armi/bookkeeping/db/layout.py @@ -17,10 +17,10 @@ When interacting with the database file, the :py:class:`Layout` class is used to help map the hierarchical Composite Reactor Model to the flat representation in -:py:class:`Database3 `. +:py:class:`Database `. This module also stores packing/packing tools to support -:py:class:`Database3 `, as well as datbase +:py:class:`Database `, as well as datbase versioning information. """ @@ -44,7 +44,7 @@ from armi.reactor.assemblyLists import AssemblyList from armi.reactor.reactors import Reactor -# Here we store the Database3 version information. +# Here we store the Database version information. DB_MAJOR = 3 DB_MINOR = 4 DB_VERSION = f"{DB_MAJOR}.{DB_MINOR}" @@ -95,7 +95,7 @@ class Layout: """ The Layout class describes the hierarchical layout of the Composite Reactor model in a flat representation for - :py:class:`Database3 `. + :py:class:`Database `. A Layout is built by starting at the root of a composite tree and recursively appending each node in the tree to a list of data. So the data will be ordered by @@ -138,7 +138,7 @@ def __init__(self, version: Tuple[int, int], h5group=None, comp=None): # There is a minor asymmetry here in that before writing to the DB, this is # truly a flat list of tuples. However when reading, this may contain lists of # tuples, which represent MI locations. This comes from the fact that we map the - # tuples to Location objects in Database3._compose, but map from Locations to + # tuples to Location objects in Database._compose, but map from Locations to # tuples in Layout._createLayout. Ideally we would handle both directions in the # same place so this can be less surprising. Resolving this would require # changing the interface of the various pack/unpack functions, which have diff --git a/armi/bookkeeping/db/tests/test_comparedb3.py b/armi/bookkeeping/db/tests/test_comparedb.py similarity index 99% rename from armi/bookkeeping/db/tests/test_comparedb3.py rename to armi/bookkeeping/db/tests/test_comparedb.py index f482e6fa2..f72fc8eb9 100644 --- a/armi/bookkeeping/db/tests/test_comparedb3.py +++ b/armi/bookkeeping/db/tests/test_comparedb.py @@ -18,7 +18,7 @@ import h5py import numpy as np -from armi.bookkeeping.db.compareDB3 import ( +from armi.bookkeeping.db.compareDB import ( _compareSets, _compareAuxData, _diffSimpleData, diff --git a/armi/bookkeeping/db/tests/test_database3.py b/armi/bookkeeping/db/tests/test_database.py similarity index 95% rename from armi/bookkeeping/db/tests/test_database3.py rename to armi/bookkeeping/db/tests/test_database.py index d7c4eca7d..ab5b809f4 100644 --- a/armi/bookkeeping/db/tests/test_database3.py +++ b/armi/bookkeeping/db/tests/test_database.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for the Database3 class.""" +"""Tests for the Database class.""" from distutils.spawn import find_executable import subprocess import unittest @@ -20,7 +20,7 @@ import numpy from armi.bookkeeping.db import _getH5File -from armi.bookkeeping.db import database3 +from armi.bookkeeping.db import database from armi.bookkeeping.db.databaseInterface import DatabaseInterface from armi.reactor import parameters from armi.reactor.tests.test_reactors import loadTestReactor, reduceTestReactorRings @@ -37,8 +37,8 @@ GIT_EXE = "git.exe" -class TestDatabase3(unittest.TestCase): - """Tests for the Database3 class.""" +class TestDatabase(unittest.TestCase): + """Tests for the Database class.""" def setUp(self): self.td = TemporaryDirectoryChanger() @@ -50,7 +50,7 @@ def setUp(self): self.dbi = DatabaseInterface(self.r, self.o.cs) self.dbi.initDB(fName=self._testMethodName + ".h5") - self.db: database3.Database3 = self.dbi.database + self.db: database.Database = self.dbi.database self.stateRetainer = self.r.retainState().__enter__() # used to test location-based history. see details below @@ -197,8 +197,8 @@ def _compareArrays(self, ref, src): def _compareRoundTrip(self, data): """Make sure that data is unchanged by packing/unpacking.""" - packed, attrs = database3.packSpecialData(data, "testing") - roundTrip = database3.unpackSpecialData(packed, attrs, "testing") + packed, attrs = database.packSpecialData(data, "testing") + roundTrip = database.unpackSpecialData(packed, attrs, "testing") self._compareArrays(data, roundTrip) def test_prepRestartRun(self): @@ -350,13 +350,13 @@ def test_computeParents(self): ] self.assertEqual( - database3.Layout.computeAncestors(serialNums, numChildren), expected_1 + database.Layout.computeAncestors(serialNums, numChildren), expected_1 ) self.assertEqual( - database3.Layout.computeAncestors(serialNums, numChildren, 2), expected_2 + database.Layout.computeAncestors(serialNums, numChildren, 2), expected_2 ) self.assertEqual( - database3.Layout.computeAncestors(serialNums, numChildren, 3), expected_3 + database.Layout.computeAncestors(serialNums, numChildren, 3), expected_3 ) def test_load(self): @@ -482,7 +482,7 @@ def test_mergeHistory(self): self.r.p.cycle = 1 self.r.p.timeNode = 0 tnGroup = self.db.getH5Group(self.r) - database3.Database3._writeAttrs( + database.Database._writeAttrs( tnGroup["layout/serialNum"], tnGroup, { @@ -492,7 +492,7 @@ def test_mergeHistory(self): ) db_path = "restartDB.h5" - db2 = database3.Database3(db_path, "w") + db2 = database.Database(db_path, "w") with db2: db2.mergeHistory(self.db, 2, 2) self.r.p.cycle = 1 @@ -506,7 +506,7 @@ def test_mergeHistory(self): ) # exercise the _resolveAttrs function - attrs = database3.Database3._resolveAttrs( + attrs = database.Database._resolveAttrs( tnGroup["layout/serialNum"].attrs, tnGroup ) self.assertTrue(numpy.array_equal(attrs["fakeBigData"], numpy.eye(6400))) @@ -529,7 +529,7 @@ def test_splitDatabase(self): self.assertEqual(newDb["c00n00/Reactor/cycle"][()], 0) self.assertEqual(newDb["c00n00/Reactor/cycleLength"][()][0], 0) self.assertNotIn("c03n00", newDb) - self.assertEqual(newDb.attrs["databaseVersion"], database3.DB_VERSION) + self.assertEqual(newDb.attrs["databaseVersion"], database.DB_VERSION) # validate that the min set of meta data keys exists meta_data_keys = [ @@ -565,7 +565,7 @@ def test_splitDatabase(self): def test_grabLocalCommitHash(self): """Test of static method to grab a local commit hash with ARMI version.""" # 1. test outside a Git repo - localHash = database3.Database3.grabLocalCommitHash() + localHash = database.Database.grabLocalCommitHash() self.assertEqual(localHash, "unknown") # 2. test inside an empty git repo @@ -580,7 +580,7 @@ def test_grabLocalCommitHash(self): return self.assertEqual(code, 0) - localHash = database3.Database3.grabLocalCommitHash() + localHash = database.Database.grabLocalCommitHash() self.assertEqual(localHash, "unknown") # 3. test inside a git repo with one tag @@ -603,7 +603,7 @@ def test_grabLocalCommitHash(self): self.assertEqual(code, 0) # test that we recover the correct commit hash - localHash = database3.Database3.grabLocalCommitHash() + localHash = database.Database.grabLocalCommitHash() self.assertEqual(localHash, "thanks") # delete the .git directory @@ -653,7 +653,7 @@ def test_readInputsFromDB(self): self.assertIn("blocks:", inputs[2]) def test_deleting(self): - self.assertEqual(type(self.db), database3.Database3) + self.assertEqual(type(self.db), database.Database) del self.db self.assertFalse(hasattr(self, "db")) self.db = self.dbi.database diff --git a/armi/bookkeeping/db/tests/test_databaseInterface.py b/armi/bookkeeping/db/tests/test_databaseInterface.py index 342f5345d..325f15ae6 100644 --- a/armi/bookkeeping/db/tests/test_databaseInterface.py +++ b/armi/bookkeeping/db/tests/test_databaseInterface.py @@ -24,7 +24,7 @@ from armi import interfaces from armi import runLog from armi import settings -from armi.bookkeeping.db.database3 import Database3 +from armi.bookkeeping.db.database import Database from armi.bookkeeping.db.databaseInterface import DatabaseInterface from armi.cases import case from armi.context import PROJECT_ROOT @@ -86,7 +86,7 @@ def setUp(self): self.o, self.r = loadTestReactor(TEST_ROOT) self.dbi = DatabaseInterface(self.r, self.o.cs) self.dbi.initDB(fName=self._testMethodName + ".h5") - self.db: Database3 = self.dbi.database + self.db: Database = self.dbi.database self.stateRetainer = self.r.retainState().__enter__() def tearDown(self): @@ -149,7 +149,7 @@ def test_writeSystemAttributes(self): :tests: R_ARMI_DB_QA """ with h5py.File("test_writeSystemAttributes.h5", "w") as h5: - Database3.writeSystemAttributes(h5) + Database.writeSystemAttributes(h5) with h5py.File("test_writeSystemAttributes.h5", "r") as h5: self.assertIn("user", h5.attrs) @@ -341,7 +341,7 @@ def _fullCoreSizeChecker(self, r): self.assertEqual(len(r.core.blocksByName), 95) def test_growToFullCore(self): - with Database3(self.dbName, "r") as db: + with Database(self.dbName, "r") as db: r = db.load(0, 0, allowMissing=True) # test partial core values @@ -355,7 +355,7 @@ def test_growToFullCore(self): self._fullCoreSizeChecker(r) def test_growToFullCoreWithCS(self): - with Database3(self.dbName, "r") as db: + with Database(self.dbName, "r") as db: r = db.load(0, 0, allowMissing=True) r.core.growToFullCore(self.cs) @@ -382,7 +382,7 @@ def test_growToFullCoreFromFactoryWithCS(self): self._fullCoreSizeChecker(r) def test_readWritten(self): - with Database3(self.dbName, "r") as db: + with Database(self.dbName, "r") as db: r2 = db.load(0, 0, self.cs) for a1, a2 in zip(self.r.core, r2.core): @@ -423,7 +423,7 @@ def test_readWritten(self): ) def test_readWithoutInputs(self): - with Database3(self.dbName, "r") as db: + with Database(self.dbName, "r") as db: r2 = db.load(0, 0) for b1, b2 in zip(self.r.core.getBlocks(), r2.core.getBlocks()): @@ -434,7 +434,7 @@ def test_readWithoutInputs(self): assert_allclose(b.p.flux, 1e6 * bi) def test_variousTypesWork(self): - with Database3(self.dbName, "r") as db: + with Database(self.dbName, "r") as db: r2 = db.load(1, 1) b1 = self.r.core.getFirstBlock(Flags.FUEL) diff --git a/armi/bookkeeping/db/tests/test_layout.py b/armi/bookkeeping/db/tests/test_layout.py index c55579ae3..3769351d0 100644 --- a/armi/bookkeeping/db/tests/test_layout.py +++ b/armi/bookkeeping/db/tests/test_layout.py @@ -16,7 +16,7 @@ import unittest from armi import context -from armi.bookkeeping.db import database3 +from armi.bookkeeping.db import database from armi.bookkeeping.db import layout from armi.reactor import grids from armi.utils.directoryChangers import TemporaryDirectoryChanger @@ -100,7 +100,7 @@ def test_locationPackingOldVersion(self): def test_close(self): intendedFileName = "xyz.h5" - db = database3.Database3(intendedFileName, "w") + db = database.Database(intendedFileName, "w") self.assertEqual(db._fileName, intendedFileName) self.assertIsNone(db._fullPath) # this isn't set until the db is opened diff --git a/armi/bookkeeping/mainInterface.py b/armi/bookkeeping/mainInterface.py index 8bc93e1bf..71827244b 100644 --- a/armi/bookkeeping/mainInterface.py +++ b/armi/bookkeeping/mainInterface.py @@ -27,7 +27,7 @@ from armi import operators from armi import runLog from armi import utils -from armi.bookkeeping.db.database3 import Database3 +from armi.bookkeeping.db.database import Database from armi.utils import pathTools from armi.utils.customExceptions import InputError @@ -137,7 +137,7 @@ def interactEveryNode(self, cycle, node): # skip at BOL because interactBOL handled it. pass else: - with Database3(self.cs["reloadDBName"], "r") as db: + with Database(self.cs["reloadDBName"], "r") as db: r = db.load(cycle, node, self.cs) self.o.reattach(r, self.cs) diff --git a/armi/bookkeeping/visualization/tests/test_vis.py b/armi/bookkeeping/visualization/tests/test_vis.py index 3b6f6bddf..f30c1a6db 100644 --- a/armi/bookkeeping/visualization/tests/test_vis.py +++ b/armi/bookkeeping/visualization/tests/test_vis.py @@ -19,7 +19,7 @@ from pyevtk.vtk import VtkTetra from armi import settings -from armi.bookkeeping.db import Database3 +from armi.bookkeeping.db import Database from armi.bookkeeping.visualization import utils from armi.bookkeeping.visualization import vtk from armi.bookkeeping.visualization import xdmf @@ -98,7 +98,7 @@ def test_dumpReactorVtk(self): def test_dumpReactorXdmf(self): # This does a lot, and is hard to verify. at least make sure it doesn't crash with TemporaryDirectoryChanger(dumpOnException=False): - db = Database3("testDatabase.h5", "w") + db = Database("testDatabase.h5", "w") with db: db.writeToDB(self.r) dumper = xdmf.XdmfDumper("testVtk", inputName="testDatabase.h5") diff --git a/armi/bookkeeping/visualization/vtk.py b/armi/bookkeeping/visualization/vtk.py index ab59c3201..403917746 100644 --- a/armi/bookkeeping/visualization/vtk.py +++ b/armi/bookkeeping/visualization/vtk.py @@ -41,7 +41,7 @@ from armi.reactor import composites from armi.reactor import reactors from armi.reactor import parameters -from armi.bookkeeping.db import database3 +from armi.bookkeeping.db import database from armi.bookkeeping.visualization import dumper from armi.bookkeeping.visualization import utils @@ -109,7 +109,7 @@ def dumpState( blockData = _collectObjectData(blks, includeParams, excludeParams) assemData = _collectObjectData(assems, includeParams, excludeParams) # block number densities are special, since they arent stored as params - blockNdens = database3.collectBlockNumberDensities(blks) + blockNdens = database.collectBlockNumberDensities(blks) # we need to copy the number density vectors to guarantee unit stride, which # pyevtk requires. Kinda seems like something it could do for us, but oh well. blockNdens = {key: numpy.array(value) for key, value in blockNdens.items()} @@ -180,7 +180,7 @@ def _collectObjectData( continue try: - data = database3.replaceNonesWithNonsense(data, pDef.name, nones=nones) + data = database.replaceNonesWithNonsense(data, pDef.name, nones=nones) except (ValueError, TypeError): # Looks like we have some weird data. We might be able to handle it # with more massaging, but probably not visualizable anyhow diff --git a/armi/bookkeeping/visualization/xdmf.py b/armi/bookkeeping/visualization/xdmf.py index 256e712f4..6fb230491 100644 --- a/armi/bookkeeping/visualization/xdmf.py +++ b/armi/bookkeeping/visualization/xdmf.py @@ -52,7 +52,7 @@ from armi.reactor import composites from armi.reactor import reactors from armi.reactor import blocks -from armi.bookkeeping.db import database3 +from armi.bookkeeping.db import database from armi.bookkeeping.visualization import dumper from armi.bookkeeping.visualization import utils @@ -165,7 +165,7 @@ def __enter__(self): # into a new h5 file, but why? raise ValueError("Input database needed to generate XDMF output!") - self._inputDb = database3.Database3(self._inputName, "r") + self._inputDb = database.Database(self._inputName, "r") with self._inputDb as db: dbVersion = db.version @@ -282,7 +282,7 @@ def dumpState( cycle = r.p.cycle node = r.p.timeNode - timeGroupName = database3.getH5GroupName(cycle, node) + timeGroupName = database.getH5GroupName(cycle, node) # careful here! we are trying to use the database datasets as the source of hard # data without copying, so the order that we make the mesh needs to be the same diff --git a/armi/cli/database.py b/armi/cli/database.py index 651cb5dfd..bf33e20ee 100644 --- a/armi/cli/database.py +++ b/armi/cli/database.py @@ -52,9 +52,9 @@ def parse_args(self, args): self.args.output_base = os.path.splitext(self.args.h5db)[0] def invoke(self): - from armi.bookkeeping.db.database3 import Database3 + from armi.bookkeeping.db.database import Database - db = Database3(self.args.h5db, "r") + db = Database(self.args.h5db, "r") with db: settings, geom, bp = db.readInputsFromDB() @@ -113,7 +113,7 @@ def addOptions(self): ) def invoke(self): - from armi.bookkeeping.db.database3 import Database3 + from armi.bookkeeping.db.database import Database if all( li is None @@ -139,7 +139,7 @@ def invoke(self): if self.args.settings is not None: settings = resolveMarkupInclusions(pathlib.Path(self.args.settings)).read() - db = Database3(self.args.h5db, "a") + db = Database(self.args.h5db, "a") with db: # Not calling writeInputsToDb, since it makes too many assumptions about diff --git a/armi/context.py b/armi/context.py index 477a7da1a..d8a52fef6 100644 --- a/armi/context.py +++ b/armi/context.py @@ -30,7 +30,7 @@ import time # h5py needs to be imported here, so that the disconnectAllHdfDBs() call that gets bound -# to atexit below doesn't lead to a segfault on python exit. The Database3 module is +# to atexit below doesn't lead to a segfault on python exit. The Database module is # imported at call time, since it itself needs stuff that is initialized in this module # to import properly. However, if that import leads to the first time that h5py is # imported in this process, doing so will cause a segfault. The theory here is that this @@ -298,8 +298,8 @@ def disconnectAllHdfDBs() -> None: get around this by using the garbage collector to manually disconnect all open HdfDB objects. """ - from armi.bookkeeping.db import Database3 + from armi.bookkeeping.db import Database - h5dbs = [db for db in gc.get_objects() if isinstance(db, Database3)] + h5dbs = [db for db in gc.get_objects() if isinstance(db, Database)] for db in h5dbs: db.close() diff --git a/armi/reactor/parameters/parameterDefinitions.py b/armi/reactor/parameters/parameterDefinitions.py index a1d6c3d88..2385f220b 100644 --- a/armi/reactor/parameters/parameterDefinitions.py +++ b/armi/reactor/parameters/parameterDefinitions.py @@ -129,7 +129,7 @@ class Serializer: operations to be performed on the parameter values as they are stored to the database or read back in. - The ``Database3`` already knows how to handle certain cases where the data are not + The ``Database`` already knows how to handle certain cases where the data are not straightforward to get into a numpy array, such as when: - There are ``None``\ s. @@ -169,8 +169,8 @@ class Serializer: See Also -------- - armi.bookkeeping.db.database3.packSpecialData - armi.bookkeeping.db.database3.unpackSpecialData + armi.bookkeeping.db.database.packSpecialData + armi.bookkeeping.db.database.unpackSpecialData armi.reactor.flags.FlagSerializer """ diff --git a/armi/reactor/tests/test_parameters.py b/armi/reactor/tests/test_parameters.py index e2068e8ae..fa96986d4 100644 --- a/armi/reactor/tests/test_parameters.py +++ b/armi/reactor/tests/test_parameters.py @@ -98,7 +98,7 @@ def test_serializer_pack_unpack(self): This tests the ability to add a serializer to a parameter instantiation line. It assumes that if this parameter is not None, that the pack and unpack methods will be called during storage to and reading from the database. See - database3._writeParams for an example use of this functionality. + database._writeParams for an example use of this functionality. .. test:: Custom parameter serializer :id: T_ARMI_PARAM_SERIALIZE diff --git a/armi/tests/tutorials/data_model.ipynb b/armi/tests/tutorials/data_model.ipynb index b71c61cc9..0b8a8ff3a 100644 --- a/armi/tests/tutorials/data_model.ipynb +++ b/armi/tests/tutorials/data_model.ipynb @@ -458,7 +458,7 @@ "metadata": {}, "source": [ "## Loading from the database\n", - "Once you have a database, you can use it to load a Reactor object from any of the states that were written to it. First, create a Database3 object, then open it and call its `load()` method." + "Once you have a database, you can use it to load a Reactor object from any of the states that were written to it. First, create a Database object, then open it and call its `load()` method." ] }, { diff --git a/doc/release/0.3.rst b/doc/release/0.3.rst index f241dff6f..abc585f96 100644 --- a/doc/release/0.3.rst +++ b/doc/release/0.3.rst @@ -29,7 +29,8 @@ Changes that Affect Requirements -------------------------------- #. Very minor change to ``Block.coords()``, removing unused argument. (`PR#1651 `_) #. Touched ``HexGrid`` by adding a "cornersUp" property and fixing two bugs. (`PR#1649 `_) -#. TBD +#. Renamed ``Database3`` to ``Database``, which trivially changes the implementation. (`PR#16?? `_) +#. TBD(`PR#1649 `_) ARMI v0.3.0 diff --git a/doc/user/outputs.rst b/doc/user/outputs.rst index 8267fa93c..26112e992 100644 --- a/doc/user/outputs.rst +++ b/doc/user/outputs.rst @@ -78,7 +78,7 @@ Among other things, the database file can be used to recover an ARMI reactor mod any of the time nodes that it contains. This can be useful for performing restart runs, or for doing custom post-processing tasks. To load a reactor state, you will need to open the database file into a ``Database`` object. From there, you can call the -:py:meth:`armi.bookkeeping.db.Database3.load()` method to get a recovered +:py:meth:`armi.bookkeeping.db.Database.load()` method to get a recovered reactor object. For instance, given a database file called ``myDatabase.h5``, we could load the reactor state at cycle 5, time node 2 with the following:: @@ -95,8 +95,8 @@ Extracting Reactor History -------------------------- Not only can the database reproduce reactor state for a given time node, it can also extract a history of specific parameters for specific objects through the -:py:meth:`armi.bookkeeping.db.Database3.getHistory()` and -:py:meth:`armi.bookkeeping.db.Database3.getHistories()` methods. +:py:meth:`armi.bookkeeping.db.Database.getHistory()` and +:py:meth:`armi.bookkeeping.db.Database.getHistories()` methods. For example, given the reactor object, ``r`` from the example above, we could get the entire history of an assembly's ring, position and areal power density with the following:: @@ -247,6 +247,6 @@ the composite model sometimes need to be manipulated to fit into the HDF5 format still being able to faithfully reconstruct the original data. To accomplish this, we use HDF5 dataset attributes to indicate when some manipulation is necessary. Writing such special data to the HDF5 file and reading it back again is accomplished with the -:py:func:`armi.bookkeeping.db.database3.packSpecialData` and -:py:func:`armi.bookkeeping.db.database3.packSpecialData`. Refer to their implementations +:py:func:`armi.bookkeeping.db.database.packSpecialData` and +:py:func:`armi.bookkeeping.db.database.packSpecialData`. Refer to their implementations and documentation for more details.