Skip to content

Commit

Permalink
Merge pull request #660 from lsst/tickets/DM-33999
Browse files Browse the repository at this point in the history
DM-33999: Use unicode in run names for some tests
  • Loading branch information
timj committed Mar 11, 2022
2 parents 2b664ad + ce263c9 commit bc02ca6
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 43 deletions.
3 changes: 3 additions & 0 deletions doc/changes/DM-33999.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
The constraints on collection names have been relaxed.
Previously collection names were limited to ASCII alphanumeric characters plus a limited selection of symbols (directory separator, @-sign).
Now all unicode alphanumerics can be used along with emoji.
12 changes: 8 additions & 4 deletions python/lsst/daf/butler/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,19 @@ def globToRegex(
if not expressions or "*" in expressions:
return Ellipsis

nomagic = re.compile(r"^[\w/\.\-@]+$", re.ASCII)
# List of special glob characters supported by fnmatch.
# See: https://docs.python.org/3/library/fnmatch.html
# The complication is that "[" on its own is not a glob
# unless there is a match "]".
magic = re.compile(r"[\*\?]|\[.*\]|\[!.*\]")

# Try not to convert simple string to a regex.
results: List[Union[str, Pattern]] = []
for e in expressions:
res: Union[str, Pattern]
if nomagic.match(e):
res = e
else:
if magic.search(e):
res = re.compile(fnmatch.translate(e))
else:
res = e
results.append(res)
return results
8 changes: 4 additions & 4 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def testDataset(self):
"""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
run = "test"
run = "tésτ"
registry.registerRun(run)
datasetType = registry.getDatasetType("bias")
dataId = {"instrument": "Cam1", "detector": 2}
Expand All @@ -400,7 +400,7 @@ def testFindDataset(self):
"""Tests for `Registry.findDataset`."""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
run = "test"
run = "tésτ"
datasetType = registry.getDatasetType("bias")
dataId = {"instrument": "Cam1", "detector": 4}
registry.registerRun(run)
Expand Down Expand Up @@ -516,7 +516,7 @@ def testImportDatasetsInt(self):

registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
run = "test"
run = "tésτ"
registry.registerRun(run)
datasetTypeBias = registry.getDatasetType("bias")
datasetTypeFlat = registry.getDatasetType("flat")
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def testSkyMapDimensions(self):
)

# dataset types
run = "test"
run = "tésτ"
registry.registerRun(run)
storageClass = StorageClass("testDataset")
registry.storageClasses.registerStorageClass(storageClass)
Expand Down
41 changes: 23 additions & 18 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class ButlerPutGetTests:
butler configurations."""

root = None
default_run = "ingésτ😺"

@staticmethod
def addDatasetType(datasetTypeName, dimensions, storageClass, registry):
Expand Down Expand Up @@ -211,7 +212,7 @@ def create_butler(self, run, storageClass, datasetTypeName):
def runPutGetTest(self, storageClass, datasetTypeName):
# New datasets will be added to run and tag, but we will only look in
# tag when looking up datasets.
run = "ingest"
run = self.default_run
butler, datasetType = self.create_butler(run, storageClass, datasetTypeName)

# Create and store a dataset
Expand Down Expand Up @@ -510,17 +511,17 @@ def setUp(self):

def testConstructor(self):
"""Independent test of constructor."""
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
self.assertIsInstance(butler, Butler)

# Check that butler.yaml is added automatically.
if self.tmpConfigFile.endswith(end := "/butler.yaml"):
config_dir = self.tmpConfigFile[: -len(end)]
butler = Butler(config_dir, run="ingest")
butler = Butler(config_dir, run=self.default_run)
self.assertIsInstance(butler, Butler)

collections = set(butler.registry.queryCollections())
self.assertEqual(collections, {"ingest"})
self.assertEqual(collections, {self.default_run})

# Check that some special characters can be included in run name.
special_run = "u@b.c-A"
Expand Down Expand Up @@ -581,7 +582,7 @@ def testCompositePutGetConcrete(self):
butler = self.runPutGetTest(storageClass, "test_metric")

# Should *not* be disassembled
datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
self.assertEqual(len(datasets), 1)
uri, components = butler.getURIs(datasets[0])
self.assertIsInstance(uri, ResourcePath)
Expand All @@ -602,7 +603,7 @@ def testCompositePutGetVirtual(self):
butler = self.runPutGetTest(storageClass, "test_metric_comp")

# Should be disassembled
datasets = list(butler.registry.queryDatasets(..., collections="ingest"))
datasets = list(butler.registry.queryDatasets(..., collections=self.default_run))
self.assertEqual(len(datasets), 1)
uri, components = butler.getURIs(datasets[0])

Expand Down Expand Up @@ -639,7 +640,7 @@ def testCompositePutGetVirtual(self):
self.assertEqual(compuri.fragment, "predicted", f"Checking for fragment in {compuri}")

def testIngest(self):
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)

# Create and register a DatasetType
dimensions = butler.registry.dimensions.extract(["instrument", "visit", "detector"])
Expand Down Expand Up @@ -861,15 +862,15 @@ def testPruneCollections(self):

def testPickle(self):
"""Test pickle support."""
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
butlerOut = pickle.loads(pickle.dumps(butler))
self.assertIsInstance(butlerOut, Butler)
self.assertEqual(butlerOut._config, butler._config)
self.assertEqual(butlerOut.collections, butler.collections)
self.assertEqual(butlerOut.run, butler.run)

def testGetDatasetTypes(self):
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
dimensions = butler.registry.dimensions.extract(["instrument", "visit", "physical_filter"])
dimensionEntries = [
(
Expand Down Expand Up @@ -936,7 +937,7 @@ def testGetDatasetTypes(self):
)

def testTransaction(self):
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
datasetTypeName = "test_metric"
dimensions = butler.registry.dimensions.extract(["instrument", "visit"])
dimensionEntries = (
Expand Down Expand Up @@ -1022,7 +1023,7 @@ def testMakeRepo(self):
Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False)

def testStringification(self):
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
butlerStr = str(butler)

if self.datastoreStr is not None:
Expand All @@ -1039,7 +1040,7 @@ def testStringification(self):
def testButlerRewriteDataId(self):
"""Test that dataIds can be rewritten based on dimension records."""

butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)

storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
datasetTypeName = "random_data"
Expand Down Expand Up @@ -1105,7 +1106,7 @@ def checkFileExists(self, root, relpath):

def testPutTemplates(self):
storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)

# Add needed Dimensions
butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"})
Expand Down Expand Up @@ -1136,7 +1137,9 @@ def testPutTemplates(self):
ref = butler.put(metric, "metric1", dataId1)
uri = butler.getURI(ref)
self.assertTrue(
self.checkFileExists(butler.datastore.root, "ingest/metric1/??#?/d-r/DummyCamComp_423.pickle"),
self.checkFileExists(
butler.datastore.root, f"{self.default_run}/metric1/??#?/d-r/DummyCamComp_423.pickle"
),
f"Checking existence of {uri}",
)

Expand All @@ -1150,7 +1153,9 @@ def testPutTemplates(self):
ref = butler.put(metric, "metric2", dataId2)
uri = butler.getURI(ref)
self.assertTrue(
self.checkFileExists(butler.datastore.root, "ingest/metric2/d-r/DummyCamComp_v423.pickle"),
self.checkFileExists(
butler.datastore.root, f"{self.default_run}/metric2/d-r/DummyCamComp_v423.pickle"
),
f"Checking existence of {uri}",
)

Expand Down Expand Up @@ -1215,7 +1220,7 @@ def runImportExportTest(self, storageClass):
skip_dimensions=None,
reuse_ids=False,
)
importButler = Butler(importDir, run="ingest")
importButler = Butler(importDir, run=self.default_run)
for ref in datasets:
with self.subTest(ref=ref):
# Test for existence by passing in the DatasetType and
Expand Down Expand Up @@ -1276,7 +1281,7 @@ class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase):

def testPathConstructor(self):
"""Independent test of constructor using PathLike."""
butler = Butler(self.tmpConfigFile, run="ingest")
butler = Butler(self.tmpConfigFile, run=self.default_run)
self.assertIsInstance(butler, Butler)

# And again with a Path object with the butler yaml
Expand Down Expand Up @@ -1389,7 +1394,7 @@ def testPytypePutCoercion(self):
# Store some data with the normal example storage class.
storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents")
datasetTypeName = "test_metric"
butler, _ = self.create_butler("ingest", storageClass, datasetTypeName)
butler, _ = self.create_butler(self.default_run, storageClass, datasetTypeName)

dataId = {"instrument": "DummyCamComp", "visit": 423}

Expand Down
38 changes: 21 additions & 17 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,23 +160,27 @@ def testGlobList(self):
"""Test that a list of glob strings converts as expected to a regex and
returns in the expected list.
"""
# test an absolute string
patterns = globToRegex(["bar"])
self.assertEqual(len(patterns), 1)
self.assertTrue(bool(re.fullmatch(patterns[0], "bar")))
self.assertIsNone(re.fullmatch(patterns[0], "boz"))

# test leading & trailing wildcard in multiple patterns
patterns = globToRegex(["ba*", "*.fits"])
self.assertEqual(len(patterns), 2)
# check the "ba*" pattern:
self.assertTrue(bool(re.fullmatch(patterns[0], "bar")))
self.assertTrue(bool(re.fullmatch(patterns[0], "baz")))
self.assertIsNone(re.fullmatch(patterns[0], "boz.fits"))
# check the "*.fits" pattern:
self.assertTrue(bool(re.fullmatch(patterns[1], "bar.fits")))
self.assertTrue(re.fullmatch(patterns[1], "boz.fits"))
self.assertIsNone(re.fullmatch(patterns[1], "boz.hdf5"))
# These strings should be returned unchanged.
strings = ["bar", "😺", "ingésτ", "ex]", "[xe", "[!no", "e[x"]
self.assertEqual(globToRegex(strings), strings)

# Globs with strings that match the glob and strings that do not.
tests = (
("bar", ["bar"], ["baz"]),
("ba*", ["bar", "baz"], ["az"]),
("ba[rz]", ["bar", "baz"], ["bat"]),
("ba[rz]x[y", ["barx[y", "bazx[y"], ["batx[y"]),
("ba[!rz]", ["bat", "baτ"], ["bar", "baz"]),
("b?r", ["bor", "bar", "b😺r"], ["bat"]),
("*.fits", ["boz.fits"], ["boz.fits.gz", "boz.hdf5"]),
)

for glob, matches, no_matches in tests:
patterns = globToRegex(glob)
for match in matches:
self.assertTrue(bool(re.fullmatch(patterns[0], match)))
for no_match in no_matches:
self.assertIsNone(re.fullmatch(patterns[0], no_match))


if __name__ == "__main__":
Expand Down

0 comments on commit bc02ca6

Please sign in to comment.