Skip to content

Commit

Permalink
Add ability to use where/bind clauses in LogBrowser
Browse files Browse the repository at this point in the history
Loading in all logs associated with a given task in a given collection
can take a very long time (i.e. depending on how many dataRefs exist
for that task.)  This adds the ability to add a "where" clause to the
dataId search such that the log loading time can be greatly reduced.
A "bind" mapping can (optionally) be used with the where clause.  If
a given key provided in the bind mapping does not appear in the where
clause string, a warning will be logged (stating that no binding will
take effect on that key).
  • Loading branch information
laurenam committed Mar 26, 2024
1 parent 9fe8be8 commit 65aefa7
Showing 1 changed file with 30 additions and 3 deletions.
33 changes: 30 additions & 3 deletions python/lsst/summit/extras/logUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ class LogBrowser:
The name of the task, e.g. ``isr``, ``characterizeImage``, etc.
collection : `str`
The processing collection to use.
where : `str`, optional
A dataId search string formatted appropriately (i.e. similary to a
SQL WHERE clause) for a where clause in butler.registry.queryDatasets.
E.g.
where = ("instrument=\'{}\' AND skymap=\'{}\' AND
"visit IN (0..100).format("LATISS", "latiss_v1"))
bind : `~collections.abc.Mapping`, optional
Mapping containing literal values to be injected into the ``where``
expression, keyed by the identifiers they replace.
E.g.
where = "exposure IN (exposures)"
bind = {"exposures": exposure_list}
Notes
-----
Expand All @@ -50,7 +62,8 @@ class LogBrowser:
animal.
example usage:
logBrowser = LogBrowser(butler, taskName, collection)
logBrowser = LogBrowser(butler, taskName, collection, where=where,
bind=bind)
fail = 'TaskError: Fatal astrometry failure detected: mean on-sky distance'
logBrowser.SPECIAL_ZOO_CASES.append(fail)
logBrowser.doFailZoology()
Expand All @@ -64,13 +77,23 @@ class LogBrowser:
"with gufunc signature (n?,k),(k,m?)->(n?,m?)",
]

def __init__(self, butler, taskName, collection):
def __init__(self, butler, taskName, collection, where="", bind=None):
self.taskName = taskName
self.collection = collection
self.where = where
self.bind = bind

self.log = _LOG.getChild("logBrowser")
self.butler = butler

if self.bind is not None:
for key in self.bind.keys():
if key not in self.where:
self.log.warn(
f"Key '{key}' in bind is not in the where string provided: "
f"'{self.where}', so no binding will take effect."
)

self.dataRefs = self._getDataRefs()
self.logs = self._loadLogs(self.dataRefs)

Expand All @@ -82,7 +105,11 @@ def _getDataRefs(self):
dataRefs : `list` [`lsst.daf.butler.core.datasets.ref.DatasetRef`]
"""
results = self.butler.registry.queryDatasets(
f"{self.taskName}_log", collections=self.collection, findFirst=True
f"{self.taskName}_log",
collections=self.collection,
findFirst=True,
where=self.where,
bind=self.bind,
)
results = list(set(results))
self.log.info(f"Found {len(results)} datasets in collection for task {self.taskName}")
Expand Down

0 comments on commit 65aefa7

Please sign in to comment.