lsst · r-owen · Mar 9, 2016 · Mar 4, 2016 · Mar 4, 2016 · Mar 5, 2016
diff --git a/python/lsst/pipe/base/argumentParser.py b/python/lsst/pipe/base/argumentParser.py
@@ -20,6 +20,7 @@
 # the GNU General Public License along with this program.  If not,
 # see <https://www.lsstcorp.org/LegalNotices/>.
 #
+import abc
 import argparse
 import collections
 import fnmatch
@@ -36,7 +37,8 @@
 import lsst.pex.logging as pexLog
 import lsst.daf.persistence as dafPersist
 
-__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer", "DatasetArgument"]
+__all__ = ["ArgumentParser", "ConfigFileAction", "ConfigValueAction", "DataIdContainer",
+    "DatasetArgument", "ConfigDatasetType"]
 
 DEFAULT_INPUT_NAME = "PIPE_INPUT_ROOT"
 DEFAULT_CALIB_NAME = "PIPE_CALIB_ROOT"
@@ -109,7 +111,7 @@ def castDataIds(self, butler):
     def makeDataRefList(self, namespace):
         """!Compute refList based on idList
 
-        Not called if add_id_argument called with doMakeDataRef=False
+        Not called if add_id_argument called with doMakeDataRefList=False
 
         @param[in] namespace    results of parsing command-line (with 'butler' and 'log' elements)
         """
@@ -127,16 +129,14 @@ def makeDataRefList(self, namespace):
                 continue
             self.refList += refList
 
-
 class DataIdArgument(object):
     """!Glorified struct for data about id arguments, used by ArgumentParser.add_id_argument"""
     def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerClass=DataIdContainer):
         """!Constructor
 
         @param[in] name         name of identifier (argument name without dashes)
         @param[in] datasetType  type of dataset; specify a string for a fixed dataset type
-            or a DatasetArgument for a dynamic dataset type (one specified on the command line),
-            in which case an argument is added by name --\<name>_dstype
+            or a DatasetArgument for a dynamic dataset type (e.g. one specified by a command-line argument)
         @param[in] level        level of dataset, for butler
         @param[in] doMakeDataRefList    construct data references?
         @param[in] ContainerClass   class to contain data IDs and data references;
@@ -151,48 +151,123 @@ def __init__(self, name, datasetType, level, doMakeDataRefList=True, ContainerCl
         self.doMakeDataRefList = bool(doMakeDataRefList)
         self.ContainerClass = ContainerClass
         self.argName = name.lstrip("-")
-        if self.isDynamicDatasetType():
-            self.datasetTypeName = datasetType.name if datasetType.name else self.name + "_dstype"
-        else:
-            self.datasetTypeName = None
 
+    @property
     def isDynamicDatasetType(self):
         """!Is the dataset type dynamic (specified on the command line)?"""
-        return isinstance(self.datasetType, DatasetArgument)
+        return isinstance(self.datasetType, DynamicDatasetType)
 
     def getDatasetType(self, namespace):
-        """!Get the dataset type
+        """!Return the dataset type as a string
 
-        @param[in] namespace    parsed command created by argparse parse_args;
-            if the dataset type is dynamic then it is read from namespace.\<name>_dstype
-            else namespace is ignored
+        @param[in] namespace  parsed command
         """
-        return getattr(namespace, self.datasetTypeName) if self.isDynamicDatasetType() else self.datasetType
+        if self.isDynamicDatasetType:
+            return self.datasetType.getDatasetType(namespace)
+        else:
+            return self.datasetType
+
 
-class DatasetArgument(object):
-    """!Specify that the dataset type should be a command-line option.
+class DynamicDatasetType(object):
+    """!Abstract base class for a dataset type determined from parsed command-line arguments
+    """
+    __metaclass__ = abc.ABCMeta
+
+    def addArgument(self, parser, idName):
+        """!Add a command-line argument to specify dataset type name, if wanted
+
+        @param[in] parser  argument parser to which to add argument
+        @param[in] idName  name of data ID argument, without the leading "--", e.g. "id"
 
-    Somewhat more heavyweight than just using, e.g., None as a signal, but
-    provides the ability to have more informative help and a default.  Also
-    more extensible in the future.
+        The default implementation does nothing
+        """
+        pass
 
-    @param[in] name     name of command-line argument (including leading "--", if wanted);
-        if omitted a suitable default is chosen
-    @param[in] help     help string for the command-line option
-    @param[in] default  default value; if None, then the option is required
+    @abc.abstractmethod
+    def getDatasetType(self, namespace):
+        """Return the dataset type as a string, based on parsed command-line arguments
+
+        @param[in] namespace  parsed command
+        """
+        raise NotImplementedError("Subclasses must override")
+
+
+class DatasetArgument(DynamicDatasetType):
+    """!A dataset type specified by a command-line argument.
     """
     def __init__(self,
-        name = None,
+        name=None,
         help="dataset type to process from input data repository",
         default=None,
     ):
+        """!Construct a DatasetArgument
+
+        @param[in] name  name of command-line argument (including leading "--", if appropriate)
+            whose value is the dataset type; if None, uses --idName_dstype
+            where idName is the name of the data ID argument (e.g. "id")
+        @param[in] help  help string for the command-line argument
+        @param[in] default  default value; if None, then the command-line option is required;
+            ignored if the argument is positional (name does not start with "-")
+            because positional argument do not support default values
+        """
+        DynamicDatasetType.__init__(self)
         self.name = name
         self.help = help
         self.default = default
 
-    @property
-    def required(self):
-        return self.default is None
+    def getDatasetType(self, namespace):
+        """Return the dataset type as a string, from the appropriate command-line argument
+
+        @param[in] namespace  parsed command
+        """
+        argName = self.name.lstrip("-")
+        return getattr(namespace, argName)
+
+    def addArgument(self, parser, idName):
+        """!Add a command-line argument to specify dataset type name
+
+        Also set self.name if it is None
+        """
+        help = self.help if self.help else "dataset type for %s" % (idName,)
+        if self.name is None:
+            self.name = "--%s_dstype" % (idName,)
+        requiredDict = dict()
+        if self.name.startswith("-"):
+            requiredDict = dict(required = self.default is None)
+        parser.add_argument(
+            self.name,
+            default = self.default,
+            help = help,
+            **requiredDict) # cannot specify required=None for positional arguments
+
+
+class ConfigDatasetType(DynamicDatasetType):
+    """!A dataset type specified by a config parameter
+    """
+    def __init__(self, name):
+        """!Construct a ConfigDatasetType
+
+        @param[in] name  name of config option whose value is the dataset type
+        """
+        DynamicDatasetType.__init__(self)
+        self.name = name
+
+    def getDatasetType(self, namespace):
+        """Return the dataset type as a string, from the appropriate config field
+
+        @param[in] namespace  parsed command
+        """
+        # getattr does not work reliably if the config field name is dotted,
+        # so step through one level at a time
+        keyList = self.name.split(".")
+        value = namespace.config
+        for key in keyList:
+            try:
+                value = getattr(value, key)
+            except KeyError:
+                raise RuntimeError("Cannot find config parameter %r" % (self.name,))
+        return value
+
 
 class ArgumentParser(argparse.ArgumentParser):
     """!An argument parser for pipeline tasks that is based on argparse.ArgumentParser
@@ -272,9 +347,9 @@ def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList
         Add an argument to specify data IDs. If datasetType is an instance of DatasetArgument,
         then add a second argument to specify the dataset type.
 
-        @param[in] name                 name of name (including leading dashes, if wanted)
+        @param[in] name                 data ID argument (including leading dashes, if wanted)
         @param[in] datasetType          type of dataset; supply a string for a fixed dataset type,
-            or a DatasetArgument for a dynamically determined dataset type
+            or a DynamicDatasetType, such as DatasetArgument, for a dynamically determined dataset type
         @param[in] help                 help string for the argument
         @param[in] level                level of dataset, for butler
         @param[in] doMakeDataRefList    construct data references?
@@ -304,15 +379,9 @@ def add_id_argument(self, name, datasetType, help, level=None, doMakeDataRefList
             ContainerClass = ContainerClass,
         )
 
-        if dataIdArgument.isDynamicDatasetType():
-            datasetType = dataIdArgument.datasetType
-            help = datasetType.help if datasetType.help else "dataset type for %s" % (name,)
-            self.add_argument(
-                "--" + dataIdArgument.datasetTypeName,
-                default = datasetType.default,
-                required = datasetType.required,
-                help = help,
-            )
+        if dataIdArgument.isDynamicDatasetType:
+            datasetType.addArgument(parser=self, idName=argName)
+
         self._dataIdArgDict[argName] = dataIdArgument
 
     def parse_args(self, config, args=None, log=None, override=None):
@@ -447,7 +516,7 @@ def _parseDirectories(self, namespace):
             if namespace.rawRerun is None:
                 guessedRerun = True
 
-        # This section processes the rerun argument, if rerun is specified as a colon separated 
+        # This section processes the rerun argument, if rerun is specified as a colon separated
         # value, it will be parsed as an input and output. The input value will be overridden if
         # previously specified (but a check is made to make sure both inputs use the same mapper)
         if namespace.rawRerun:
@@ -494,21 +563,22 @@ def _processDataIds(self, namespace):
             reads these attributes:
             - butler
             - log
-            - \<name_dstype> for each data ID argument with a dynamic dataset type registered using
-                add_id_argument
+            - config, if any dynamic dataset types are set by a config parameter
+            - dataset type arguments (e.g. id_dstype), if any dynamic dataset types are specified by such
             and modifies these attributes:
             - \<name> for each data ID argument registered using add_id_argument
         """
         for dataIdArgument in self._dataIdArgDict.itervalues():
             dataIdContainer = getattr(namespace, dataIdArgument.name)
             dataIdContainer.setDatasetType(dataIdArgument.getDatasetType(namespace))
-            try:
-                dataIdContainer.castDataIds(butler = namespace.butler)
-            except (KeyError, TypeError) as e:
-                # failure of castDataIds indicates invalid command args
-                self.error(e)
-            # failure of makeDataRefList indicates a bug that wants a traceback
             if dataIdArgument.doMakeDataRefList:
+                try:
+                    dataIdContainer.castDataIds(butler = namespace.butler)
+                except (KeyError, TypeError) as e:
+                    # failure of castDataIds indicates invalid command args
+                    self.error(e)
+
+                # failure of makeDataRefList indicates a bug that wants a traceback
                 dataIdContainer.makeDataRefList(namespace)
 
     def _applyInitialOverrides(self, namespace):

diff --git a/tests/testArgumentParser.py b/tests/testArgumentParser.py
@@ -66,6 +66,9 @@ class SampleConfig(pexConfig.Config):
     subItem = pexConfig.ConfigField(doc="sample subfield", dtype=SubConfig)
     multiDocItem = pexConfig.Field(doc="1. sample... \n#2...multiline \n##3...#\n###4...docstring",
                                    dtype=str, default="multiLineDoc")
+    dsType = pexConfig.Field(doc="dataset type for --id argument", dtype=str, default="calexp")
+    dsTypeNoDefault = pexConfig.Field(doc="dataset type for --id argument; no default", dtype=str,
+        optional=True)
 
 class ArgumentParserTestCase(unittest.TestCase):
     """A test case for ArgumentParser."""
@@ -371,6 +374,113 @@ def testBareHelp(self):
             except SystemExit, e:
                 self.assertEqual(e.code, 0)
 
+    def testDatasetArgumentBasics(self):
+        """Test DatasetArgument basics"""
+        dsTypeHelp = "help text for dataset argument"
+        for name in (None, "--foo"):
+            for default in (None, "raw"):
+                argName = name if name is not None else "--id_dstype"
+                ap = pipeBase.ArgumentParser(name="argumentParser")
+                dsType = pipeBase.DatasetArgument(name=name, help=dsTypeHelp, default=default)
+                self.assertEqual(dsType.help, dsTypeHelp)
+
+                ap.add_id_argument("--id", dsType, "help text")
+                namespace = ap.parse_args(
+                    config = self.config,
+                    args = [DataPath,
+                        argName, "calexp",
+                        "--id", "visit=2",
+                    ],
+                )
+                self.assertEqual(namespace.id.datasetType, "calexp")
+                self.assertEqual(len(namespace.id.idList), 1)
+
+                del namespace
+
+                if default is None:
+                    # make sure dataset type argument is required
+                    with self.assertRaises(SystemExit):
+                        ap.parse_args(
+                            config = self.config,
+                            args = [DataPath,
+                                "--id", "visit=2",
+                            ],
+                        )
+                else:
+                    namespace = ap.parse_args(
+                        config = self.config,
+                        args = [DataPath,
+                            "--id", "visit=2",
+                        ],
+                    )
+                    self.assertEqual(namespace.id.datasetType, default)
+                    self.assertEqual(len(namespace.id.idList), 1)
+
+    def testDatasetArgumentPositional(self):
+        """Test DatasetArgument with a positional argument"""
+        name = "foo"
+        defaultDsTypeHelp = "dataset type to process from input data repository"
+        ap = pipeBase.ArgumentParser(name="argumentParser")
+        dsType = pipeBase.DatasetArgument(name=name)
+        self.assertEqual(dsType.help, defaultDsTypeHelp)
+
+        ap.add_id_argument("--id", dsType, "help text")
+        namespace = ap.parse_args(
+            config = self.config,
+            args = [DataPath,
+                "calexp",
+                "--id", "visit=2",
+            ],
+        )
+        self.assertEqual(namespace.id.datasetType, "calexp")
+        self.assertEqual(len(namespace.id.idList), 1)
+
+        # make sure dataset type argument is required
+        with self.assertRaises(SystemExit):
+            ap.parse_args(
+                config = self.config,
+                args = [DataPath,
+                    "--id", "visit=2",
+                ],
+            )
+
+    def testConfigDatasetTypeFieldDefault(self):
+        """Test ConfigDatasetType with a config field that has a default value"""
+        # default value for config field "dsType" is "calexp";
+        # use a different value as the default for the ConfigDatasetType
+        # so the test can tell the difference
+        name = "dsType"
+        ap = pipeBase.ArgumentParser(name="argumentParser")
+        dsType = pipeBase.ConfigDatasetType(name=name)
+
+        ap.add_id_argument("--id", dsType, "help text")
+        namespace = ap.parse_args(
+            config = self.config,
+            args = [DataPath,
+                "--id", "visit=2",
+            ],
+        )
+        self.assertEqual(namespace.id.datasetType, "calexp") # default of config field dsType
+        self.assertEqual(len(namespace.id.idList), 1)
+
+    def testConfigDatasetTypeNoFieldDefault(self):
+        """Test ConfigDatasetType with a config field that has no default value"""
+        name = "dsTypeNoDefault"
+        ap = pipeBase.ArgumentParser(name="argumentParser")
+        dsType = pipeBase.ConfigDatasetType(name=name)
+
+        ap.add_id_argument("--id", dsType, "help text")
+        # neither the argument nor the config field has a default,
+        # so the user must specify the argument (or specify doMakeDataRefList=False
+        # and post-process the ID list)
+        with self.assertRaises(RuntimeError):
+            ap.parse_args(
+                config = self.config,
+                args = [DataPath,
+                    "--id", "visit=2",
+                ],
+            )
+
 def suite():
     utilsTests.init()