lsst · rhiannonlynne · Apr 29, 2024 · Apr 26, 2024 · Apr 26, 2024 · Apr 26, 2024
diff --git a/rubin_sim/maf/batches/visitdepth_batch.py b/rubin_sim/maf/batches/visitdepth_batch.py
@@ -101,8 +101,9 @@ def nvisitsM5Maps(
             lat_lon_deg=degrees,
             use_cache=False,
         )
-    elif slicer.use_cache:
-        # If there is already a slicer set up which *is* using caching
+    else:
+        # If there is already a slicer set up, ensure we have one for dust
+        # which is NOT using cache.
         slicerDust = copy.deepcopy(slicer)
         slicerDust.use_cache = False
 

diff --git a/rubin_sim/maf/run_comparison/summary_plots.py b/rubin_sim/maf/run_comparison/summary_plots.py
@@ -242,7 +242,7 @@ def plot_run_metric(
             if baseline_run is None
             else normalize_metric_summaries(baseline_run, summary, metric_sets=metric_set)
         )
-        .stack(dropna=False)
+        .stack(future_stack=True)
         .rename("value")
         .reset_index()
         .rename(columns={"OpsimRun": "run"})

diff --git a/rubin_sim/maf/stackers/base_stacker.py b/rubin_sim/maf/stackers/base_stacker.py
@@ -52,23 +52,30 @@ def help(cls, doc=False):
 
 
 class BaseStacker(metaclass=StackerRegistry):
-    """Base MAF Stacker: add columns generated at run-time to the simdata array."""
+    """Base MAF Stacker.
+
+    Stackers add columns generated at run-time to the simdata array.
+    """
 
     # List of the names of the columns generated by the Stacker.
     cols_added = []
 
     def __init__(self):
         """
         Instantiate the stacker.
-        This method should be overriden by the user. This serves as an example of
-        the variables required by the framework.
+        This method should be overriden by the user.
+        This serves to define the API required by MAF.
         """
-        # Add the list of new columns generated by the stacker as class attributes (colsAdded - above).
-        # List of the names of the columns required from the database (to generate the Stacker columns).
+        # Add the list of new columns generated by the stacker
+        # as class attributes (colsAdded - above).
+
+        # List of the names of the columns required from the database
+        # (to generate the Stacker columns).
         self.cols_req = []
         # Optional: specify the new column types.
         self.cols_added_dtypes = None
-        # Optional: provide a list of units for the columns defined in colsAdded.
+        # Optional: provide a list of units for the columns
+        # defined in colsAdded.
         self.units = [None]
 
     def __hash__(self):
@@ -77,18 +84,24 @@ def __hash__(self):
     def __eq__(self, other_stacker):
         """
         Evaluate if two stackers are equivalent.
+
+        This method is required to determine if metric_bundles may be
+        evaluated at the same time, on the same data.
         """
         # If the class names are different, they are not 'the same'.
         if self.__class__.__name__ != other_stacker.__class__.__name__:
             return False
-        # Otherwise, this is the same stacker class, but may be instantiated differently.
-        # We have to delve a little further, and compare the kwargs & attributes for each stacker.
+        # Otherwise, this is the same stacker class,
+        # but may be instantiated differently.
+        # We have to delve a little further,
+        # and compare the kwargs & attributes for each stacker.
         state_now = dir(self)
         for key in state_now:
             if not key.startswith("_") and key != "registry" and key != "run" and key != "next":
                 if not hasattr(other_stacker, key):
                     return False
-                # If the attribute is from numpy, assume it's an array and test it
+                # If the attribute is from numpy, assume it's an array
+                # and test it
                 if type(getattr(self, key)).__module__ == np.__name__:
                     if not np.array_equal(getattr(self, key), getattr(other_stacker, key)):
                         return False
@@ -109,8 +122,10 @@ def __ne__(self, other_stacker):
     def _add_stacker_cols(self, sim_data):
         """
         Add the new Stacker columns to the sim_data array.
-        If columns already present in sim_data, just allows 'run' method to overwrite.
-        Returns sim_data array with these columns added (so 'run' method can set their values).
+        If columns already present in sim_data,
+        just allows 'run' method to overwrite.
+        Returns sim_data array with these columns added
+        (so 'run' method can set their values).
         """
         if not hasattr(self, "cols_added_dtypes") or self.cols_added_dtypes is None:
             self.cols_added_dtypes = [float for col in self.cols_added]
@@ -131,7 +146,8 @@ def _add_stacker_cols(self, sim_data):
         # Add references to old data.
         for col in sim_data.dtype.names:
             new_data[col] = sim_data[col]
-        # Were all columns present and populated with something not None? If so, then consider 'all there'.
+        # Were all columns present and populated with something not None?
+        # If so, then consider 'all there'.
         if sum(cols_present) == len(self.cols_added):
             cols_present = True
         else:
@@ -140,8 +156,21 @@ def _add_stacker_cols(self, sim_data):
 
     def run(self, sim_data, override=False):
         """
-        Example: Generate the new stacker columns, given the simdata columns from the database.
-        Returns the new simdata structured array that includes the new stacker columns.
+        Run the stacker, adding new columns.
+
+        Parameters
+        ----------
+        sim_data : `np.ndarray`, (N, M)
+            The data to be used to evaluate metrics.
+        override : `bool`, optional
+            If True, recalculate new (stacker) columns even if present.
+            If False, calculates stacker columns only if they are not present.
+
+
+        Returns
+        -------
+        sim_data : `np.ndarray`, (N, MM)
+            The input data, plus additional stacker columns.
         """
         # Add new columns
         if len(sim_data) == 0:
@@ -162,25 +191,31 @@ def run(self, sim_data, override=False):
             return self._run(sim_data)
 
     def _run(self, sim_data, cols_present=False):
-        """Run the stacker. This is the method to subclass.
+        """Do the work to add the new columns. This method should be overriden
+        in subclasses.
 
         Parameters
         ----------
-        sim_data: np.NDarray
+        sim_data: `np.ndarray`, (N, M)
             The observation data, provided by the MAF framework.
-        cols_present: bool, optional
-            Flag to indicate whether the columns to be added are already present in the data.
-            This will also be provided by the MAF framework -- but your _run method can use the value.
-            If it is 'True' and you do trust the existing value, the _run method can simply return sim_data.
+        cols_present: `bool`, optional
+            Flag to indicate what to do with the columns already present in
+            the data.  This will also be provided by the MAF framework --
+            but your _run method can use the value. If it is 'True' and you
+            do trust the existing value, the _run method can simply
+            return sim_data without additional calculations.
 
         Returns
         -------
-        np.NDarray
-            The simdata, with the columns added or updated (or simply already present).
+        sim_data : `np.ndarray`, (N, MM)
+            The simdata, with the columns added or updated
+            (or simply already present).
         """
-        # By moving the calculation of these columns to a separate method, we add the possibility of using
-        #  stackers with pandas dataframes. The _addStackerCols method won't work with dataframes, but the
-        #  _run methods are quite likely to (depending on their details), as they are just populating columns.
+        # By moving the calculation of these columns to a separate method,
+        # we add the possibility of using stackers with pandas dataframes.
+        # The _addStackerCols method won't work with dataframes, but the
+        #  _run methods are quite likely to (depending on their details),
+        #  as they are just populating columns.
         raise NotImplementedError(
             "Not Implemented: " "the child stackers should implement their own _run methods"
         )
diff --git a/rubin_sim/maf/stackers/date_stackers.py b/rubin_sim/maf/stackers/date_stackers.py
@@ -21,6 +21,7 @@ def __init__(
         mjd_col="observationStartMJD",
     ):
         self.mjd_col = mjd_col
+        self.cols_req = [self.mjd_col]
         self.units = [None]
         self.cols_added_dtypes = ["datetime64[ns]"]
 
@@ -76,6 +77,7 @@ class DayObsStacker(BaseStacker):
 
     def __init__(self, mjd_col="observationStartMJD"):
         self.mjd_col = mjd_col
+        self.cols_req = [self.mjd_col]
         self.units = ["days"]
         self.cols_added_dtypes = [int]
 
@@ -102,6 +104,7 @@ class DayObsMJDStacker(BaseStacker):
 
     def __init__(self, mjd_col="observationStartMJD"):
         self.mjd_col = mjd_col
+        self.cols_req = [self.mjd_col]
         self.units = ["days"]
         self.cols_added_dtypes = [int]
 
@@ -127,6 +130,7 @@ class DayObsISOStacker(BaseStacker):
 
     def __init__(self, mjd_col="observationStartMJD"):
         self.mjd_col = mjd_col
+        self.cols_req = [self.mjd_col]
         self.units = [None]
         self.cols_added_dtypes = [(str, 10)]