Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38943: fix query-data-ids CLI handling of empty queries #833

Merged
merged 4 commits into from
Apr 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-38943.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed a bug in `butler query-data-ids` that caused a cryptic "the query has deferred operations..." error message when a spatial join is involved.
11 changes: 8 additions & 3 deletions python/lsst/daf/butler/script/queryDataIds.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,13 @@ def queryDataIds(
new_offset = offset if offset > 0 else None
results = results.limit(limit, new_offset)

if results.count() > 0 and len(results.graph) > 0:
table = _Table(results)
return table.getAstropyTable(not order_by), None
if results.any(exact=False):
if results.graph:
table = _Table(results)
if not table.dataIds:
return None, "Post-query region filtering removed all rows, since nothing overlapped."
return table.getAstropyTable(not order_by), None
else:
return None, "Result has one logical row but no columns because no dimensions were requested."
else:
return None, "\n".join(results.explain_no_results())
132 changes: 78 additions & 54 deletions tests/test_cliCmdQueryDataIds.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@

from astropy.table import Table as AstropyTable
from lsst.daf.butler import Butler, DatasetType, script
from lsst.daf.butler.tests.utils import ButlerTestHelper, MetricTestRepo, makeTestTempDir, removeTestTempDir
from lsst.daf.butler.tests.utils import ButlerTestHelper, makeTestTempDir, removeTestTempDir
from lsst.daf.butler.transfers import YamlRepoImportBackend
from numpy import array

TESTDIR = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -53,88 +54,108 @@ def _queryDataIds(repo, dimensions=(), collections=(), datasets=None, where=""):

def setUp(self):
self.root = makeTestTempDir(TESTDIR)
self.repo = MetricTestRepo(
root=self.root, configFile=os.path.join(TESTDIR, "config/basic/butler.yaml")
)
self.repo = Butler.makeRepo(self.root)

def tearDown(self):
removeTestTempDir(self.root)

def loadData(self, *filenames: str) -> Butler:
"""Load registry test data from ``TESTDIR/data/registry/<filename>``,
which should be a YAML import/export file.
"""
butler = Butler(self.repo, writeable=True)
for filename in filenames:
with open(os.path.join(TESTDIR, "data", "registry", filename), "r") as stream:
# Go behind the back of the import code a bit to deal with
# the fact that this is just registry content with no actual
# files for the datastore.
backend = YamlRepoImportBackend(stream, butler.registry)
backend.register()
backend.load(datastore=None)
return butler

def testDimensions(self):
"""Test getting a dimension."""
res, msg = self._queryDataIds(self.root, dimensions=("visit",))
self.loadData("base.yaml")
res, msg = self._queryDataIds(self.root, dimensions=("detector",))
expected = AstropyTable(
array((("R", "DummyCamComp", "d-r", 423), ("R", "DummyCamComp", "d-r", 424))),
names=("band", "instrument", "physical_filter", "visit"),
array((("Cam1", 1), ("Cam1", 2), ("Cam1", 3), ("Cam1", 4))), names=("instrument", "detector")
)
self.assertFalse(msg)
self.assertAstropyTablesEqual(res, expected)

def testNull(self):
"Test asking for nothing."
def testNoDimensions(self):
"""Test asking for no dimensions."""
res, msg = self._queryDataIds(self.root)
self.assertIsNone(res, msg)
self.assertEqual(msg, "")
self.assertEqual(
msg, "Result has one logical row but no columns because no dimensions were requested."
)

def testWhere(self):
"""Test with a WHERE constraint."""
def testNoResultsEasy(self):
"""Test getting no results in a way that's detectable without having
to execute the full query.
"""
self.loadData("base.yaml", "spatial.yaml")
res, msg = self._queryDataIds(
self.root, dimensions=("visit",), where="instrument='DummyCamComp' AND visit=423"
self.root,
dimensions=("visit", "tract"),
where="instrument='Cam1' AND skymap='SkyMap1' AND visit=1 AND tract=1",
)
expected = AstropyTable(
array((("R", "DummyCamComp", "d-r", 423),)),
names=("band", "instrument", "physical_filter", "visit"),
)
self.assertAstropyTablesEqual(res, expected)
self.assertIsNone(msg)

def testDatasetsAndCollections(self):
"""Test constraining via datasets and collections."""
self.assertIsNone(res, msg)
self.assertIn("yields no results when applied to", msg)

# Add a dataset in a different collection
self.butler = Butler(self.root, run="foo")
self.repo.butler.registry.insertDimensionData(
"visit",
{
"instrument": "DummyCamComp",
"id": 425,
"name": "fourtwentyfive",
"physical_filter": "d-r",
},
def testNoResultsHard(self):
"""Test getting no results in a way that can't be detected unless we
run the whole query.
"""
self.loadData("base.yaml", "spatial.yaml")
res, msg = self._queryDataIds(
self.root,
dimensions=("visit", "tract"),
where="instrument='Cam1' AND skymap='SkyMap1' AND visit=1 AND tract=0 AND patch=5",
)
self.repo.addDataset(dataId={"instrument": "DummyCamComp", "visit": 425}, run="foo")
self.assertIsNone(res, msg)
self.assertIn("Post-query region filtering removed all rows", msg)

# Verify the new dataset is not found in the "ingest/run" collection.
def testWhere(self):
"""Test with a WHERE constraint."""
self.loadData("base.yaml")
res, msg = self._queryDataIds(
repo=self.root, dimensions=("visit",), collections=("ingest/run",), datasets="test_metric_comp"
self.root, dimensions=("detector",), where="instrument='Cam1' AND detector=2"
)
expected = AstropyTable(
array((("R", "DummyCamComp", "d-r", 423), ("R", "DummyCamComp", "d-r", 424))),
names=("band", "instrument", "physical_filter", "visit"),
array((("Cam1", 2),)),
names=(
"instrument",
"detector",
),
)
self.assertAstropyTablesEqual(res, expected)
self.assertIsNone(msg)

# Verify the new dataset is found in the "foo" collection.
def testDatasetsAndCollections(self):
"""Test constraining via datasets and collections."""
butler = self.loadData("base.yaml", "datasets-uuid.yaml")
# See that the data IDs returned are constrained by that collection's
# contents.
res, msg = self._queryDataIds(
repo=self.root, dimensions=("visit",), collections=("foo",), datasets="test_metric_comp"
repo=self.root, dimensions=("detector",), collections=("imported_g",), datasets="bias"
)
expected = AstropyTable(
array((("R", "DummyCamComp", "d-r", 425),)),
names=("band", "instrument", "physical_filter", "visit"),
array((("Cam1", 1), ("Cam1", 2), ("Cam1", 3))),
names=(
"instrument",
"detector",
),
)
self.assertAstropyTablesEqual(res, expected)
self.assertIsNone(msg)

# Verify the new dataset is found in the "foo" collection and the
# dimensions are determined automatically.
# Check that the dimensions are inferred when not provided.
with self.assertLogs("lsst.daf.butler.script.queryDataIds", "INFO") as cm:
res, msg = self._queryDataIds(repo=self.root, collections=("foo",), datasets="test_metric_comp")
res, msg = self._queryDataIds(repo=self.root, collections=("imported_g",), datasets="bias")
self.assertIn("Determined dimensions", "\n".join(cm.output))
expected = AstropyTable(
array((("R", "DummyCamComp", "d-r", 425),)),
names=("band", "instrument", "physical_filter", "visit"),
)
self.assertAstropyTablesEqual(res, expected)
self.assertIsNone(msg)

Expand All @@ -143,28 +164,31 @@ def testDatasetsAndCollections(self):
"test_metric_dimensionless",
(),
"StructuredDataDict",
universe=self.repo.butler.registry.dimensions,
universe=butler.registry.dimensions,
)
self.repo.butler.registry.registerDatasetType(new_dataset_type)
res, msg = self._queryDataIds(repo=self.root, collections=("foo",), datasets=...)
butler.registry.registerDatasetType(new_dataset_type)
res, msg = self._queryDataIds(repo=self.root, collections=("imported_g",), datasets=...)
self.assertIsNone(res)
self.assertIn("No dimensions in common", msg)

# Check that we get a reason returned if no dataset type is found.
with self.assertWarns(FutureWarning):
res, msg = self._queryDataIds(
repo=self.root, dimensions=("visit",), collections=("foo",), datasets="raw"
repo=self.root, dimensions=("detector",), collections=("imported_g",), datasets="raw"
)
self.assertIsNone(res)
self.assertEqual(msg, "Dataset type raw is not registered.")

# Check that we get a reason returned if no dataset is found in
# collection.
res, msg = self._queryDataIds(
repo=self.root, dimensions=("visit",), collections=("ingest",), datasets="test_metric_comp"
repo=self.root,
dimensions=("detector",),
collections=("imported_g",),
datasets="test_metric_dimensionless",
)
self.assertIsNone(res)
self.assertIn("No datasets of type test_metric_comp", msg)
self.assertIn("No datasets of type test_metric_dimensionless", msg)


if __name__ == "__main__":
Expand Down