Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OPSIM-1149: bugfix to add cols_req to new stackers #409

Merged
merged 6 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions rubin_sim/maf/batches/visitdepth_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ def nvisitsM5Maps(
lat_lon_deg=degrees,
use_cache=False,
)
elif slicer.use_cache:
# If there is already a slicer set up which *is* using caching
else:
# If there is already a slicer set up, ensure we have one for dust
# which is NOT using cache.
slicerDust = copy.deepcopy(slicer)
slicerDust.use_cache = False

Expand Down
2 changes: 1 addition & 1 deletion rubin_sim/maf/run_comparison/summary_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def plot_run_metric(
if baseline_run is None
else normalize_metric_summaries(baseline_run, summary, metric_sets=metric_set)
)
.stack(dropna=False)
.stack(future_stack=True)
.rename("value")
.reset_index()
.rename(columns={"OpsimRun": "run"})
Expand Down
85 changes: 60 additions & 25 deletions rubin_sim/maf/stackers/base_stacker.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,30 @@ def help(cls, doc=False):


class BaseStacker(metaclass=StackerRegistry):
"""Base MAF Stacker: add columns generated at run-time to the simdata array."""
"""Base MAF Stacker.

Stackers add columns generated at run-time to the simdata array.
"""

# List of the names of the columns generated by the Stacker.
cols_added = []

def __init__(self):
"""
Instantiate the stacker.
This method should be overriden by the user. This serves as an example of
the variables required by the framework.
This method should be overriden by the user.
This serves to define the API required by MAF.
"""
# Add the list of new columns generated by the stacker as class attributes (colsAdded - above).
# List of the names of the columns required from the database (to generate the Stacker columns).
# Add the list of new columns generated by the stacker
# as class attributes (colsAdded - above).

# List of the names of the columns required from the database
# (to generate the Stacker columns).
self.cols_req = []
# Optional: specify the new column types.
self.cols_added_dtypes = None
# Optional: provide a list of units for the columns defined in colsAdded.
# Optional: provide a list of units for the columns
# defined in colsAdded.
self.units = [None]

def __hash__(self):
Expand All @@ -77,18 +84,24 @@ def __hash__(self):
def __eq__(self, other_stacker):
"""
Evaluate if two stackers are equivalent.

This method is required to determine if metric_bundles may be
evaluated at the same time, on the same data.
"""
# If the class names are different, they are not 'the same'.
if self.__class__.__name__ != other_stacker.__class__.__name__:
return False
# Otherwise, this is the same stacker class, but may be instantiated differently.
# We have to delve a little further, and compare the kwargs & attributes for each stacker.
# Otherwise, this is the same stacker class,
# but may be instantiated differently.
# We have to delve a little further,
# and compare the kwargs & attributes for each stacker.
state_now = dir(self)
for key in state_now:
if not key.startswith("_") and key != "registry" and key != "run" and key != "next":
if not hasattr(other_stacker, key):
return False
# If the attribute is from numpy, assume it's an array and test it
# If the attribute is from numpy, assume it's an array
# and test it
if type(getattr(self, key)).__module__ == np.__name__:
if not np.array_equal(getattr(self, key), getattr(other_stacker, key)):
return False
Expand All @@ -109,8 +122,10 @@ def __ne__(self, other_stacker):
def _add_stacker_cols(self, sim_data):
"""
Add the new Stacker columns to the sim_data array.
If columns already present in sim_data, just allows 'run' method to overwrite.
Returns sim_data array with these columns added (so 'run' method can set their values).
If columns already present in sim_data,
just allows 'run' method to overwrite.
Returns sim_data array with these columns added
(so 'run' method can set their values).
"""
if not hasattr(self, "cols_added_dtypes") or self.cols_added_dtypes is None:
self.cols_added_dtypes = [float for col in self.cols_added]
Expand All @@ -131,7 +146,8 @@ def _add_stacker_cols(self, sim_data):
# Add references to old data.
for col in sim_data.dtype.names:
new_data[col] = sim_data[col]
# Were all columns present and populated with something not None? If so, then consider 'all there'.
# Were all columns present and populated with something not None?
# If so, then consider 'all there'.
if sum(cols_present) == len(self.cols_added):
cols_present = True
else:
Expand All @@ -140,8 +156,21 @@ def _add_stacker_cols(self, sim_data):

def run(self, sim_data, override=False):
"""
Example: Generate the new stacker columns, given the simdata columns from the database.
Returns the new simdata structured array that includes the new stacker columns.
Run the stacker, adding new columns.

Parameters
----------
sim_data : `np.ndarray`, (N, M)
The data to be used to evaluate metrics.
override : `bool`, optional
If True, recalculate new (stacker) columns even if present.
If False, calculates stacker columns only if they are not present.


Returns
-------
sim_data : `np.ndarray`, (N, MM)
The input data, plus additional stacker columns.
"""
# Add new columns
if len(sim_data) == 0:
Expand All @@ -162,25 +191,31 @@ def run(self, sim_data, override=False):
return self._run(sim_data)

def _run(self, sim_data, cols_present=False):
"""Run the stacker. This is the method to subclass.
"""Do the work to add the new columns. This method should be overriden
in subclasses.

Parameters
----------
sim_data: np.NDarray
sim_data: `np.ndarray`, (N, M)
The observation data, provided by the MAF framework.
cols_present: bool, optional
Flag to indicate whether the columns to be added are already present in the data.
This will also be provided by the MAF framework -- but your _run method can use the value.
If it is 'True' and you do trust the existing value, the _run method can simply return sim_data.
cols_present: `bool`, optional
Flag to indicate what to do with the columns already present in
the data. This will also be provided by the MAF framework --
but your _run method can use the value. If it is 'True' and you
do trust the existing value, the _run method can simply
return sim_data without additional calculations.

Returns
-------
np.NDarray
The simdata, with the columns added or updated (or simply already present).
sim_data : `np.ndarray`, (N, MM)
The simdata, with the columns added or updated
(or simply already present).
"""
# By moving the calculation of these columns to a separate method, we add the possibility of using
# stackers with pandas dataframes. The _addStackerCols method won't work with dataframes, but the
# _run methods are quite likely to (depending on their details), as they are just populating columns.
# By moving the calculation of these columns to a separate method,
# we add the possibility of using stackers with pandas dataframes.
# The _addStackerCols method won't work with dataframes, but the
# _run methods are quite likely to (depending on their details),
# as they are just populating columns.
raise NotImplementedError(
"Not Implemented: " "the child stackers should implement their own _run methods"
)
4 changes: 4 additions & 0 deletions rubin_sim/maf/stackers/date_stackers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(
mjd_col="observationStartMJD",
):
self.mjd_col = mjd_col
self.cols_req = [self.mjd_col]
self.units = [None]
self.cols_added_dtypes = ["datetime64[ns]"]

Expand Down Expand Up @@ -76,6 +77,7 @@ class DayObsStacker(BaseStacker):

def __init__(self, mjd_col="observationStartMJD"):
self.mjd_col = mjd_col
self.cols_req = [self.mjd_col]
self.units = ["days"]
self.cols_added_dtypes = [int]

Expand All @@ -102,6 +104,7 @@ class DayObsMJDStacker(BaseStacker):

def __init__(self, mjd_col="observationStartMJD"):
self.mjd_col = mjd_col
self.cols_req = [self.mjd_col]
self.units = ["days"]
self.cols_added_dtypes = [int]

Expand All @@ -127,6 +130,7 @@ class DayObsISOStacker(BaseStacker):

def __init__(self, mjd_col="observationStartMJD"):
self.mjd_col = mjd_col
self.cols_req = [self.mjd_col]
self.units = [None]
self.cols_added_dtypes = [(str, 10)]

Expand Down
Loading
Loading