Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-5348 #9

Merged
merged 5 commits into from
Mar 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
166 changes: 118 additions & 48 deletions python/lsst/pipe/base/argumentParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# the GNU General Public License along with this program. If not,
# see <https://www.lsstcorp.org/LegalNotices/>.
#
import abc
import argparse
import collections
import fnmatch
Expand All @@ -36,7 +37,8 @@
import lsst.pex.logging as pexLog
import lsst.daf.persistence as dafPersist

__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer", "DatasetArgument"]
__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
"DatasetArgument", "ConfigDatasetType"]

DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
Expand Down Expand Up @@ -109,7 +111,7 @@ def castDataIds(self, butler):
def makeDataRefList(self, namespace):
"""!Compute refList based on idList

Not called if add_id_argument called with doMakeDataRef=False
Not called if add_id_argument called with doMakeDataRefList=False

@param[in] namespace results of parsing command-line (with 'butler' and 'log' elements)
"""
Expand All @@ -127,16 +129,14 @@ def makeDataRefList(self, namespace):
continue
self.refList += refList


class DataIdArgument(object):
"""!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
"""!Constructor

@param[in] name name of identifier (argument name without dashes)
@param[in] datasetType type of dataset; specify a string for a fixed dataset type
or a DatasetArgument for a dynamic dataset type (one specified on the command line),
in which case an argument is added by name --\<name>_dstype
or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
@param[in] level level of dataset, for butler
@param[in] doMakeDataRefList construct data references?
@param[in] ContainerClass class to contain data IDs and data references;
Expand All @@ -151,48 +151,123 @@ def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerCl
self.doMakeDataRefList = bool(doMakeDataRefList)
self.ContainerClass = ContainerClass
self.argName = name.lstrip("-")
if self.isDynamicDatasetType():
self.datasetTypeName = datasetType.name if datasetType.name else self.name + "_dstype"
else:
self.datasetTypeName = None

@property
def isDynamicDatasetType(self):
"""!Is the dataset type dynamic (specified on the command line)?"""
return isinstance(self.datasetType, DatasetArgument)
return isinstance(self.datasetType, DynamicDatasetType)

def getDatasetType(self, namespace):
"""!Get the dataset type
"""!Return the dataset type as a string

@param[in] namespace parsed command created by argparse parse_args;
if the dataset type is dynamic then it is read from namespace.\<name>_dstype
else namespace is ignored
@param[in] namespace parsed command
"""
return getattr(namespace, self.datasetTypeName) if self.isDynamicDatasetType() else self.datasetType
if self.isDynamicDatasetType:
return self.datasetType.getDatasetType(namespace)
else:
return self.datasetType


class DatasetArgument(object):
"""!Specify that the dataset type should be a command-line option.
class DynamicDatasetType(object):
"""!Abstract base class for a dataset type determined from parsed command-line arguments
"""
__metaclass__ = abc.ABCMeta

def addArgument(self, parser, idName):
"""!Add a command-line argument to specify dataset type name, if wanted

@param[in] parser argument parser to which to add argument
@param[in] idName name of data ID argument, without the leading "--", e.g. "id"

Somewhat more heavyweight than just using, e.g., None as a signal, but
provides the ability to have more informative help and a default. Also
more extensible in the future.
The default implementation does nothing
"""
pass

@param[in] name name of command-line argument (including leading "--", if wanted);
if omitted a suitable default is chosen
@param[in] help help string for the command-line option
@param[in] default default value; if None, then the option is required
@abc.abstractmethod
def getDatasetType(self, namespace):
"""Return the dataset type as a string, based on parsed command-line arguments

@param[in] namespace parsed command
"""
raise NotImplementedError("Subclasses must override")


class DatasetArgument(DynamicDatasetType):
"""!A dataset type specified by a command-line argument.
"""
def __init__(self,
name = None,
name=None,
help="dataset type to process from input data repository",
default=None,
):
"""!Construct a DatasetArgument

@param[in] name name of command-line argument (including leading "--", if appropriate)
whose value is the dataset type; if None, uses --idName_dstype
where idName is the name of the data ID argument (e.g. "id")
@param[in] help help string for the command-line argument
@param[in] default default value; if None, then the command-line option is required;
ignored if the argument is positional (name does not start with "-")
because positional argument do not support default values
"""
DynamicDatasetType.__init__(self)
self.name = name
self.help = help
self.default = default

@property
def required(self):
return self.default is None
def getDatasetType(self, namespace):
"""Return the dataset type as a string, from the appropriate command-line argument

@param[in] namespace parsed command
"""
argName = self.name.lstrip("-")
return getattr(namespace, argName)

def addArgument(self, parser, idName):
"""!Add a command-line argument to specify dataset type name

Also set self.name if it is None
"""
help = self.help if self.help else "dataset type for %s" % (idName,)
if self.name is None:
self.name = "--%s_dstype" % (idName,)
requiredDict = dict()
if self.name.startswith("-"):
requiredDict = dict(required = self.default is None)
parser.add_argument(
self.name,
default = self.default,
help = help,
**requiredDict) # cannot specify required=None for positional arguments


class ConfigDatasetType(DynamicDatasetType):
"""!A dataset type specified by a config parameter
"""
def __init__(self, name):
"""!Construct a ConfigDatasetType

@param[in] name name of config option whose value is the dataset type
"""
DynamicDatasetType.__init__(self)
self.name = name

def getDatasetType(self, namespace):
"""Return the dataset type as a string, from the appropriate config field

@param[in] namespace parsed command
"""
# getattr does not work reliably if the config field name is dotted,
# so step through one level at a time
keyList = self.name.split(".")
value = namespace.config
for key in keyList:
try:
value = getattr(value, key)
except KeyError:
raise RuntimeError("Cannot find config parameter %r" % (self.name,))
return value


class ArgumentParser(argparse.ArgumentParser):
"""!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
Expand Down Expand Up @@ -272,9 +347,9 @@ def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList
Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
then add a second argument to specify the dataset type.

@param[in] name name of name (including leading dashes, if wanted)
@param[in] name data ID argument (including leading dashes, if wanted)
@param[in] datasetType type of dataset; supply a string for a fixed dataset type,
or a DatasetArgument for a dynamically determined dataset type
or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
@param[in] help help string for the argument
@param[in] level level of dataset, for butler
@param[in] doMakeDataRefList construct data references?
Expand Down Expand Up @@ -304,15 +379,9 @@ def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList
ContainerClass = ContainerClass,
)

if dataIdArgument.isDynamicDatasetType():
datasetType = dataIdArgument.datasetType
help = datasetType.help if datasetType.help else "dataset type for %s" % (name,)
self.add_argument(
"--" + dataIdArgument.datasetTypeName,
default = datasetType.default,
required = datasetType.required,
help = help,
)
if dataIdArgument.isDynamicDatasetType:
datasetType.addArgument(parser=self, idName=argName)

self._dataIdArgDict[argName] = dataIdArgument

def parse_args(self, config, args=None, log=None, override=None):
Expand Down Expand Up @@ -447,7 +516,7 @@ def _parseDirectories(self, namespace):
if namespace.rawRerun is None:
guessedRerun = True

# This section processes the rerun argument, if rerun is specified as a colon separated
# This section processes the rerun argument, if rerun is specified as a colon separated
# value, it will be parsed as an input and output. The input value will be overridden if
# previously specified (but a check is made to make sure both inputs use the same mapper)
if namespace.rawRerun:
Expand Down Expand Up @@ -494,21 +563,22 @@ def _processDataIds(self, namespace):
reads these attributes:
- butler
- log
- \<name_dstype> for each data ID argument with a dynamic dataset type registered using
add_id_argument
- config, if any dynamic dataset types are set by a config parameter
- dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
and modifies these attributes:
- \<name> for each data ID argument registered using add_id_argument
"""
for dataIdArgument in self._dataIdArgDict.itervalues():
dataIdContainer = getattr(namespace, dataIdArgument.name)
dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
try:
dataIdContainer.castDataIds(butler = namespace.butler)
except (KeyError, TypeError) as e:
# failure of castDataIds indicates invalid command args
self.error(e)
# failure of makeDataRefList indicates a bug that wants a traceback
if dataIdArgument.doMakeDataRefList:
try:
dataIdContainer.castDataIds(butler = namespace.butler)
except (KeyError, TypeError) as e:
# failure of castDataIds indicates invalid command args
self.error(e)

# failure of makeDataRefList indicates a bug that wants a traceback
dataIdContainer.makeDataRefList(namespace)

def _applyInitialOverrides(self, namespace):
Expand Down
110 changes: 110 additions & 0 deletions tests/testArgumentParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ class SampleConfig(pexConfig.Config):
subItem = pexConfig.ConfigField(doc="sample subfield", dtype=SubConfig)
multiDocItem = pexConfig.Field(doc="1. sample... \n#2...multiline \n##3...#\n###4...docstring",
dtype=str, default="multiLineDoc")
dsType = pexConfig.Field(doc="dataset type for --id argument", dtype=str, default="calexp")
dsTypeNoDefault = pexConfig.Field(doc="dataset type for --id argument; no default", dtype=str,
optional=True)

class ArgumentParserTestCase(unittest.TestCase):
"""A test case for ArgumentParser."""
Expand Down Expand Up @@ -371,6 +374,113 @@ def testBareHelp(self):
except SystemExit, e:
self.assertEqual(e.code, 0)

def testDatasetArgumentBasics(self):
"""Test DatasetArgument basics"""
dsTypeHelp = "help text for dataset argument"
for name in (None, "--foo"):
for default in (None, "raw"):
argName = name if name is not None else "--id_dstype"
ap = pipeBase.ArgumentParser(name="argumentParser")
dsType = pipeBase.DatasetArgument(name=name, help=dsTypeHelp, default=default)
self.assertEqual(dsType.help, dsTypeHelp)

ap.add_id_argument("--id", dsType, "help text")
namespace = ap.parse_args(
config = self.config,
args = [DataPath,
argName, "calexp",
"--id", "visit=2",
],
)
self.assertEqual(namespace.id.datasetType, "calexp")
self.assertEqual(len(namespace.id.idList), 1)

del namespace

if default is None:
# make sure dataset type argument is required
with self.assertRaises(SystemExit):
ap.parse_args(
config = self.config,
args = [DataPath,
"--id", "visit=2",
],
)
else:
namespace = ap.parse_args(
config = self.config,
args = [DataPath,
"--id", "visit=2",
],
)
self.assertEqual(namespace.id.datasetType, default)
self.assertEqual(len(namespace.id.idList), 1)

def testDatasetArgumentPositional(self):
"""Test DatasetArgument with a positional argument"""
name = "foo"
defaultDsTypeHelp = "dataset type to process from input data repository"
ap = pipeBase.ArgumentParser(name="argumentParser")
dsType = pipeBase.DatasetArgument(name=name)
self.assertEqual(dsType.help, defaultDsTypeHelp)

ap.add_id_argument("--id", dsType, "help text")
namespace = ap.parse_args(
config = self.config,
args = [DataPath,
"calexp",
"--id", "visit=2",
],
)
self.assertEqual(namespace.id.datasetType, "calexp")
self.assertEqual(len(namespace.id.idList), 1)

# make sure dataset type argument is required
with self.assertRaises(SystemExit):
ap.parse_args(
config = self.config,
args = [DataPath,
"--id", "visit=2",
],
)

def testConfigDatasetTypeFieldDefault(self):
"""Test ConfigDatasetType with a config field that has a default value"""
# default value for config field "dsType" is "calexp";
# use a different value as the default for the ConfigDatasetType
# so the test can tell the difference
name = "dsType"
ap = pipeBase.ArgumentParser(name="argumentParser")
dsType = pipeBase.ConfigDatasetType(name=name)

ap.add_id_argument("--id", dsType, "help text")
namespace = ap.parse_args(
config = self.config,
args = [DataPath,
"--id", "visit=2",
],
)
self.assertEqual(namespace.id.datasetType, "calexp") # default of config field dsType
self.assertEqual(len(namespace.id.idList), 1)

def testConfigDatasetTypeNoFieldDefault(self):
"""Test ConfigDatasetType with a config field that has no default value"""
name = "dsTypeNoDefault"
ap = pipeBase.ArgumentParser(name="argumentParser")
dsType = pipeBase.ConfigDatasetType(name=name)

ap.add_id_argument("--id", dsType, "help text")
# neither the argument nor the config field has a default,
# so the user must specify the argument (or specify doMakeDataRefList=False
# and post-process the ID list)
with self.assertRaises(RuntimeError):
ap.parse_args(
config = self.config,
args = [DataPath,
"--id", "visit=2",
],
)

def suite():
utilsTests.init()

Expand Down