Skip to content

Commit

Permalink
add glob argument to query-collections command
Browse files Browse the repository at this point in the history
  • Loading branch information
n8pease committed Sep 1, 2020
1 parent 412c6a5 commit 544148e
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 29 deletions.
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def config_validate(*args, **kwargs):

@click.command(short_help="Search for collections.")
@repo_argument(required=True)
@glob_argument(help="GLOB is one or more glob-style expressions that fully or partially identify the "
"collections to return.")
@collection_type_option()
@click.option("--flatten-chains/--no-flatten-chains",
help="Recursively get the child collections of matching CHAINED collections. Default is "
Expand Down
17 changes: 17 additions & 0 deletions python/lsst/daf/butler/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import errno
import os
import builtins
import fnmatch
import functools
import re
from typing import (
Expand Down Expand Up @@ -344,3 +345,19 @@ def findFileResources(values: Iterable[str], regex: Optional[str] = None) -> Lis
else:
resources.append(location)
return resources


def globToRegex(expressions):
"""Translate glob-style search terms to regex.
Parameters
----------
expressions : `list` [`str` or `...`]
A glob-style pattern string to convert, or an Ellipsis.
Returns
-------
expressions : [`str` or `...`]
A list of expressions that are either regex or Ellipsis.
"""
return [e if e == ... else re.compile(fnmatch.translate(e)) for e in expressions]
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/script/queryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,20 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from .. import Butler
from ..core.utils import globToRegex


def queryCollections(repo, collection_type, flatten_chains, include_chains):
def queryCollections(repo, glob, collection_type, flatten_chains, include_chains):
"""Get the collections whose names match an expression.
Parameters
----------
repo : `str`
URI to the location of the repo or URI to a config file describing the
repo and its location.
glob : iterable [`str`]
A list of glob-style search string that fully or partially identify
the dataset type names to search for.
collection_type : `CollectionType` or `None`
If provided, only return collections of this type.
flatten_chains : `bool`
Expand All @@ -47,7 +51,11 @@ def queryCollections(repo, collection_type, flatten_chains, include_chains):
collection names.
"""
butler = Butler(repo)
kwargs = {}
if glob:
kwargs['expression'] = globToRegex(glob)
collections = butler.registry.queryCollections(collectionType=collection_type,
flattenChains=flatten_chains,
includeChains=include_chains)
includeChains=include_chains,
**kwargs)
return {'collections': list(collections)}
24 changes: 2 additions & 22 deletions python/lsst/daf/butler/script/queryDatasetTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import fnmatch
import re

from .. import Butler


def _translateExpr(expr):
"""Translate glob-style search terms to regex.
Parameters
----------
expr : `str` or `...`
A glob-style pattern string to convert, or an Ellipsis.
Returns
-------
expressions : [`str` or `...`]
A list of expressions that are either regex or Ellipsis.
"""
if expr == ...:
return expr
return re.compile(fnmatch.translate(expr))
from ..core.utils import globToRegex


def queryDatasetTypes(repo, verbose, glob, components):
Expand Down Expand Up @@ -73,7 +53,7 @@ def queryDatasetTypes(repo, verbose, glob, components):
butler = Butler(repo)
kwargs = {}
if glob:
kwargs['expression'] = [_translateExpr(g) for g in glob]
kwargs['expression'] = globToRegex(glob)
datasetTypes = butler.registry.queryDatasetTypes(components=components, **kwargs)
if verbose:
info = [dict(name=datasetType.name,
Expand Down
17 changes: 13 additions & 4 deletions tests/test_cliCmdQueryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def defaultExpected():
return dict(repo=None,
collection_type=None,
flatten_chains=False,
glob=(),
include_chains=None)

@staticmethod
Expand All @@ -53,10 +54,11 @@ def test_minimal(self):

def test_all(self):
"""Test all parameters"""
self.run_test(["query-collections", "here",
self.run_test(["query-collections", "here", "foo*",
"--flatten-chains",
"--include-chains"],
self.makeExpected(repo="here",
glob=("foo*",),
flatten_chains=True,
include_chains=True))

Expand All @@ -65,15 +67,22 @@ class QueryCollectionsScriptTest(unittest.TestCase):

def testGetCollections(self):
run = "ingest/run"
tag = "ingest"
expected = {"collections": [run, tag]}
tag = "tag"
runner = LogCliRunner()
with runner.isolated_filesystem():
butlerCfg = Butler.makeRepo("here")
# the purpose of this call is to create some collections
_ = Butler(butlerCfg, run=run, tags=[tag], collections=[tag])

# Verify collections that were created are found by
# query-collections.
result = runner.invoke(cli, ["query-collections", "here"])
self.assertEqual(expected, yaml.safe_load(result.output))
self.assertEqual({"collections": [run, tag]}, yaml.safe_load(result.output))

# Verify that with a glob argument, that only collections whose
# name matches with the specified pattern are returned.
result = runner.invoke(cli, ["query-collections", "here", "t*"])
self.assertEqual({"collections": [tag]}, yaml.safe_load(result.output))


if __name__ == "__main__":
Expand Down
21 changes: 20 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from collections import Counter, namedtuple
import fnmatch
from glob import glob
import os
import re
import unittest

from lsst.daf.butler.core.utils import findFileResources, iterable, getFullTypeName, Singleton
from lsst.daf.butler.core.utils import findFileResources, getFullTypeName, globToRegex, iterable, Singleton
from lsst.daf.butler import Formatter, Registry
from lsst.daf.butler import NamedKeyDict, StorageClass

Expand Down Expand Up @@ -202,5 +204,22 @@ def test_multipleInputs(self):
self.assertEqual(expected, files)


class GlobToRegexTestCase(unittest.TestCase):

def testGlobList(self):
"""Test that a list of glob strings converts as expected to a regex and
returns in the expected list.
This assumes the conversion algorithm of globToRegex produces the same
results as in the right side of the assert, and verifies the
globToRegex function api.
"""
testval = ["foo*", ..., "bar"]
self.assertEqual(globToRegex(testval),
[re.compile(fnmatch.translate(testval[0])),
...,
re.compile(fnmatch.translate(testval[2]))])


if __name__ == "__main__":
unittest.main()

0 comments on commit 544148e

Please sign in to comment.