Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-25030 make an import butler subcommand #293

Merged
merged 14 commits into from
May 30, 2020
28 changes: 18 additions & 10 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
Mapping,
MutableMapping,
Optional,
TextIO,
Tuple,
Union,
)
Expand Down Expand Up @@ -1291,7 +1292,7 @@ def export(self, *, directory: Optional[str] = None,
helper._finish()

def import_(self, *, directory: Optional[str] = None,
filename: Optional[str] = None,
filename: Union[str, TextIO, None] = None,
format: Optional[str] = None,
transfer: Optional[str] = None):
"""Import datasets exported from a different butler repository.
Expand All @@ -1301,12 +1302,12 @@ def import_(self, *, directory: Optional[str] = None,
directory : `str`, optional
Directory containing dataset files. If `None`, all file paths
must be absolute.
filename : `str`, optional
Name for the file that containing database information associated
with the exported datasets. If this is not an absolute path, does
not exist in the current working directory, and ``directory`` is
not `None`, it is assumed to be in ``directory``. Defaults to
"export.{format}".
filename : `str` or `TextIO`, optional
A stream or name of file that contains database information
associated with the exported datasets. If this a string (name) and
is not an absolute path, does not exist in the current working
directory, and ``directory`` is not `None`, it is assumed to be in
``directory``. Defaults to "export.{format}".
format : `str`, optional
File format for the database information file. If `None`, the
extension of ``filename`` will be used.
Expand All @@ -1328,15 +1329,22 @@ def import_(self, *, directory: Optional[str] = None,
_, format = os.path.splitext(filename)
elif filename is None:
filename = f"export.{format}"
if directory is not None and not os.path.exists(filename):
if isinstance(filename, str) and directory is not None and not os.path.exists(filename):
filename = os.path.join(directory, filename)
BackendClass = getClassOf(self._config["repo_transfer_formats"][format]["import"])
with open(filename, 'r') as stream:
backend = BackendClass(stream, self.registry)

def doImport(importStream):
backend = BackendClass(importStream, self.registry)
backend.register()
with self.transaction():
backend.load(self.datastore, directory=directory, transfer=transfer)

if isinstance(filename, str):
with open(filename, "r") as stream:
doImport(stream)
else:
doImport(filename)

def validateConfiguration(self, logFailures: bool = False,
datasetTypeNames: Optional[Iterable[str]] = None,
ignore: Iterable[str] = None):
Expand Down
48 changes: 32 additions & 16 deletions python/lsst/daf/butler/cli/butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,38 @@ def _initLogging(logLevel):


def funcNameToCmdName(functionName):
"""Change underscores, used in functions, to dashes, used in commands."""
"""Convert function name to the butler command name: change underscores,
(used in functions) to dashes (used in commands), and change local-package
command names that conflict with python keywords to a leagal function name.
"""
# The "import" command name and "butler_import" function name are defined
# in cli/cmd/commands.py, and if those names are changed they must be
# changed here as well.
# It is expected that there will be very few butler command names that need
# to be changed because of e.g. conflicts with python keywords (as is done
# here and in cmdNameToFuncName for the 'import' command). If this becomes
# a common need then some way of doing this should be invented that is
# better than hard coding the function names into these conversion
# functions. An extension of the 'cli/resources.yaml' file (as is currently
# used in obs_base) might be a good way to do it.
if functionName == "butler_import":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One other option that may be less prone to scaling problems could be to define a dict that maps from "import" to "butler_import" and then automatically generate the inverse dict. Then in these two functions you do the equivalent of:

functionName = funcToCommand.get(functionName, functionName)

(I understand that my dict name there is non-optimal)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about making some kind of data structure like that, but if I'm going to do even that much work I wanted to take a moment to figure out the best way to solve the problem. I think it might involve the plugin system, but I didn't want to spent that much time on it or add complexity, yet.

functionName = "import"
return functionName.replace("_", "-")


def cmdNameToFuncName(commandName):
"""Change dashes, used in commands, to underscores, used in functions."""
"""Convert butler command name to function name: change dashes (used in
commands) to underscores (used in functions), and for local-package
commands names that conflict with python keywords, change the local, legal,
function name to the command name."""
if commandName == "import":
commandName = "butler_import"
return commandName.replace("-", "_")


class LoaderCLI(click.MultiCommand):

def __init__(self, *args, **kwargs):
self.commands = None
super().__init__(*args, **kwargs)

@staticmethod
Expand Down Expand Up @@ -164,8 +183,7 @@ def _getCommands(cls):
The key is the command name. The value is a list of package(s) that
contains the command.
"""
commands = cls._mergeCommandLists(cls._getLocalCommands(), cls._getPluginCommands())
return commands
return cls._mergeCommandLists(cls._getLocalCommands(), cls._getPluginCommands())

@staticmethod
def _raiseIfDuplicateCommands(commands):
Expand Down Expand Up @@ -206,11 +224,10 @@ def list_commands(self, ctx):
commands : `list` [`str`]
The names of the commands that can be called by the butler command.
"""
if self.commands is None:
self.commands = self._getCommands()
self._raiseIfDuplicateCommands(self.commands)
log.debug(self.commands.keys())
return self.commands.keys()
commands = self._getCommands()
self._raiseIfDuplicateCommands(commands)
log.debug(commands.keys())
return commands.keys()

def get_command(self, context, name):
"""Used by Click to get a single command for execution.
Expand All @@ -227,14 +244,13 @@ def get_command(self, context, name):
command : click.Command
A Command that wraps a callable command function.
"""
if self.commands is None:
self.commands = self._getCommands()
if name not in self.commands:
commands = self._getCommands()
if name not in commands:
return None
self._raiseIfDuplicateCommands(self.commands)
if self.commands[name][0] == localCmdPkg:
self._raiseIfDuplicateCommands(commands)
if commands[name][0] == localCmdPkg:
return getattr(butlerCommands, cmdNameToFuncName(name))
return doImport(self.commands[name][0] + "." + cmdNameToFuncName(name))
return doImport(commands[name][0] + "." + cmdNameToFuncName(name))


@click.command(cls=LoaderCLI)
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/cli/cmd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ["create", "config_dump", "config_validate"]
__all__ = ["butler_import", "create", "config_dump", "config_validate"]


from .commands import create, config_dump, config_validate
from .commands import butler_import, create, config_dump, config_validate
32 changes: 28 additions & 4 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,31 @@

import click

from ..opt import config_file_option, dataset_type_option, repo_argument
from ..utils import split_commas, cli_handle_exception
from ...script import createRepo, configDump, configValidate
from ..opt import (config_file_option, dataset_type_option, directory_argument, repo_argument, run_option,
transfer_option)
from ..utils import split_commas, cli_handle_exception, typeStrAcceptsMultiple
from ...script import butlerImport, createRepo, configDump, configValidate


# The conversion from the import command name to the butler_import function
# name for subcommand lookup is implemented in the cli/butler.py, in
# funcNameToCmdName and cmdNameToFuncName. If name changes are made here they
# must be reflected in that location. If this becomes a common pattern a better
# mechanism should be implemented.
@click.command("import")
@repo_argument(required=True, help=repo_argument.will_create_repo)
@directory_argument(required=True)
@transfer_option()
@run_option(required=True)
@click.option("--export-file",
help="Name for the file that contains database information associated with the exported "
"datasets. If this is not an absolute path, does not exist in the current working "
"directory, and --dir is provided, it is assumed to be in that directory. Defaults "
"to \"export.yaml\".",
type=click.File('r'))
def butler_import(*args, **kwargs):
"""Import data into a butler repository."""
cli_handle_exception(butlerImport, *args, **kwargs)


@click.command()
Expand All @@ -45,7 +67,8 @@ def create(*args, **kwargs):
@click.option("--subset", "-s", type=str,
help="Subset of a configuration to report. This can be any key in the hierarchy such as "
"'.datastore.root' where the leading '.' specified the delimiter for the hierarchy.")
@click.option("--searchpath", "-p", type=str, multiple=True,
@click.option("--searchpath", "-p", type=str, multiple=True, callback=split_commas,
metavar=typeStrAcceptsMultiple,
help="Additional search paths to use for configuration overrides")
@click.option("--file", "outfile", type=click.File("w"), default="-",
help="Print the (possibly-expanded) configuration for a repository to a file, or to stdout "
Expand All @@ -60,6 +83,7 @@ def config_dump(*args, **kwargs):
@click.option("--quiet", "-q", is_flag=True, help="Do not report individual failures.")
@dataset_type_option(help="Specific DatasetType(s) to validate.")
@click.option("--ignore", "-i", type=str, multiple=True, callback=split_commas,
metavar=typeStrAcceptsMultiple,
help="DatasetType(s) to ignore for validation.")
def config_validate(*args, **kwargs):
"""Validate the configuration files for a Gen3 Butler repository."""
Expand Down
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/cli/opt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from .config import config_option
from .config_file import config_file_option
from .dataset_type import dataset_type_option
from .directory import directory_argument
from .repo import repo_argument
from .run import run_option
from .transfer import transfer_option

51 changes: 51 additions & 0 deletions python/lsst/daf/butler/cli/opt/directory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


import click

from ..utils import addArgumentHelp


class directory_argument: # noqa: N801
"""Decorator to add a directory argument to a click command.

Parameters
----------
required : `bool`, optional
Indicates if the caller must pass this argument to the command, by
default True.
help : `str`, optional
The help text for this argument to append to the command's help text.
If None or '' then nothing will be appended to the help text (in which
case the command should document this argument directly in its help
text). By default, the value of directory_argument.default_help
"""

default_help = "DIRECTORY is the folder containing dataset files."

def __init__(self, required=False, help=default_help):
self.required = required
self.helpText = help

def __call__(self, f):
f.__doc__ = addArgumentHelp(f.__doc__, self.helpText)
return click.argument("directory", required=self.required)(f)
8 changes: 3 additions & 5 deletions python/lsst/daf/butler/cli/opt/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

import click

from ..utils import addArgumentHelp


class repo_argument: # noqa: N801
"""Decorator to add a repo argument to a click command.
Expand All @@ -48,9 +50,5 @@ def __init__(self, required=False, help=existing_repo):
self.helpText = help

def __call__(self, f):
if self.helpText:
# Modify the passed-in fucntions's doc string, which is used to
# generate the Click Command help, to include the argument help
# text:
f.__doc__ = f"{'' if f.__doc__ is None else f.__doc__}\n\n {self.helpText}"
f.__doc__ = addArgumentHelp(f.__doc__, self.helpText)
return click.argument("repo", required=self.required)(f)
38 changes: 38 additions & 0 deletions python/lsst/daf/butler/cli/opt/transfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


import click

allowed_types = ["auto", "link", "symlink", "hardlink", "copy", "move", "relsymlink"]


class transfer_option: # noqa: N801
def __init__(self, required=False, help=None):
self.required = required
self.help = "The external data transfer mode." if help is None else help

def __call__(self, f):
return click.option("-t", "--transfer",
default="auto",
type=click.Choice(allowed_types),
required=self.required,
help=self.help)(f)