From 61607e574445b00f0a2244f361e5b0a8ce07a82b Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 20 Jul 2023 02:44:11 +0300 Subject: [PATCH] ls/import/get: introduce --config Adds support for `--config` to all three commands. For `ls` and `get`, it is a purely runtime option that will merge the config you provide with target repo's configs (similar to how .dvc/config.local works). E.g. ``` $ cat myconfig [remote "myremote"] access_key_id = 12345 secret_access_key = 98765 $ dvc get https://github.com/efiop/mydataregistry recent-grads.csv --config myconfig ``` In case of `dvc import`, the semantics are the same, but the value is also recorded in the resulting dvcfile. E.g. ``` $ dvc import https://github.com/efiop/mydataregistry recent-grads.csv --config myconfig ... $ cat recent-grads.csv.dvc md5: 85c86eeb3afa6d1f7d11beedc644d28e frozen: true deps: - path: recent-grads.csv repo: url: https://github.com/efiop/mydataregistry config: myconfig rev_lock: af22e06bbfe34d62a140437abf32f48b535944a7 outs: - md5: f447e23a170a9f25a7799f069a1ba934 size: 26872 hash: md5 path: recent-grads.csv ``` This is the most general and powerful mechanism that we can give, that works for everything from reconfiguring your default remote to completely changing your remote layout and beyond (will be handy in the future). Fixes #2466 --- dvc/commands/get.py | 9 +++++++++ dvc/commands/imp.py | 9 +++++++++ dvc/commands/ls/__init__.py | 9 +++++++++ dvc/config.py | 33 ++++++++++++++++++++------------ dvc/dependency/repo.py | 30 +++++++++++++++++++++++++++-- dvc/repo/get.py | 7 ++++++- dvc/repo/imp.py | 5 ++++- dvc/repo/ls.py | 13 ++++++++++++- tests/unit/command/ls/test_ls.py | 28 ++++++++++++++++++++++----- tests/unit/command/test_get.py | 10 +++++++++- tests/unit/command/test_imp.py | 5 +++++ 11 files changed, 135 insertions(+), 23 deletions(-) diff --git a/dvc/commands/get.py b/dvc/commands/get.py index 9ede8bfd9a..b0e61351b3 100644 --- a/dvc/commands/get.py +++ b/dvc/commands/get.py @@ -37,6 +37,7 @@ def _get_file_from_repo(self): rev=self.args.rev, jobs=self.args.jobs, force=self.args.force, + config=self.args.config, ) return 0 except CloneError: @@ -102,4 +103,12 @@ def add_parser(subparsers, parent_parser): default=False, help="Override local file or folder if exists.", ) + get_parser.add_argument( + "--config", + type=str, + help=( + "Path to a config file that will be merged with the config " + "in the target repository.", + ), + ) get_parser.set_defaults(func=CmdGet) diff --git a/dvc/commands/imp.py b/dvc/commands/imp.py index 34021a5521..1b3bfdcd2b 100644 --- a/dvc/commands/imp.py +++ b/dvc/commands/imp.py @@ -22,6 +22,7 @@ def run(self): no_exec=self.args.no_exec, no_download=self.args.no_download, jobs=self.args.jobs, + config=self.args.config, ) except CloneError: logger.exception("failed to import '%s'", self.args.path) @@ -94,4 +95,12 @@ def add_parser(subparsers, parent_parser): ), metavar="", ) + import_parser.add_argument( + "--config", + type=str, + help=( + "Path to a config file that will be merged with the config " + "in the target repository.", + ), + ) import_parser.set_defaults(func=CmdImport) diff --git a/dvc/commands/ls/__init__.py b/dvc/commands/ls/__init__.py index e293dbe554..35b95deaa5 100644 --- a/dvc/commands/ls/__init__.py +++ b/dvc/commands/ls/__init__.py @@ -34,6 +34,7 @@ def run(self): rev=self.args.rev, recursive=self.args.recursive, dvc_only=self.args.dvc_only, + config=self.args.config, ) if self.args.json: ui.write_json(entries) @@ -80,6 +81,14 @@ def add_parser(subparsers, parent_parser): help="Git revision (e.g. SHA, branch, tag)", metavar="", ) + list_parser.add_argument( + "--config", + type=str, + help=( + "Path to a config file that will be merged with the config " + "in the target repository.", + ), + ) list_parser.add_argument( "path", nargs="?", diff --git a/dvc/config.py b/dvc/config.py index 0a8288da32..e74ad52bfe 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -176,23 +176,32 @@ def _get_fs(self, level): # the repo. return self.fs if level == "repo" else self.wfs - def _load_config(self, level): + @staticmethod + def load_file(path, fs=None) -> dict: from configobj import ConfigObj, ConfigObjError + from dvc.fs import localfs + + fs = fs or localfs + + with fs.open(path) as fobj: + try: + conf_obj = ConfigObj(fobj) + except UnicodeDecodeError as exc: + raise ConfigError(str(exc)) from exc + except ConfigObjError as exc: + raise ConfigError(str(exc)) from exc + + return _parse_named(_lower_keys(conf_obj.dict())) + + def _load_config(self, level): filename = self.files[level] fs = self._get_fs(level) - if fs.exists(filename): - with fs.open(filename) as fobj: - try: - conf_obj = ConfigObj(fobj) - except UnicodeDecodeError as exc: - raise ConfigError(str(exc)) from exc - except ConfigObjError as exc: - raise ConfigError(str(exc)) from exc - else: - conf_obj = ConfigObj() - return _parse_named(_lower_keys(conf_obj.dict())) + try: + return self.load_file(filename, fs=fs) + except FileNotFoundError: + return {} def _save_config(self, level, conf_dict): from configobj import ConfigObj diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 8e59560054..7c6fbb43b9 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -16,12 +16,14 @@ class RepoDependency(Dependency): PARAM_URL = "url" PARAM_REV = "rev" PARAM_REV_LOCK = "rev_lock" + PARAM_CONFIG = "config" REPO_SCHEMA = { PARAM_REPO: { Required(PARAM_URL): str, PARAM_REV: str, PARAM_REV_LOCK: str, + PARAM_CONFIG: str, } } @@ -60,7 +62,24 @@ def save(self): self.def_repo[self.PARAM_REV_LOCK] = rev def dumpd(self, **kwargs) -> Dict[str, Union[str, Dict[str, str]]]: - return {self.PARAM_PATH: self.def_path, self.PARAM_REPO: self.def_repo} + repo = {self.PARAM_URL: self.def_repo[self.PARAM_URL]} + + rev = self.def_repo.get(self.PARAM_REV) + if rev: + repo[self.PARAM_REV] = self.def_repo[self.PARAM_REV] + + rev_lock = self.def_repo.get(self.PARAM_REV_LOCK) + if rev_lock: + repo[self.PARAM_REV_LOCK] = rev_lock + + config = self.def_repo.get(self.PARAM_CONFIG) + if config: + repo[self.PARAM_CONFIG] = config + + return { + self.PARAM_PATH: self.def_path, + self.PARAM_REPO: repo, + } def update(self, rev: Optional[str] = None): if rev: @@ -77,9 +96,16 @@ def changed_checksum(self) -> bool: def _make_fs( self, rev: Optional[str] = None, locked: bool = True ) -> "DVCFileSystem": + from dvc.config import Config from dvc.fs import DVCFileSystem - config = {"cache": self.repo.config["cache"]} + conf = self.def_repo.get("config") + if conf: + config = Config.load_file(conf) + else: + config = {} + + config["cache"] = self.repo.config["cache"] config["cache"]["dir"] = self.repo.cache.local_cache_dir return DVCFileSystem( diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 6f31064ab0..178a46f59d 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -19,7 +19,8 @@ def __init__(self): ) -def get(url, path, out=None, rev=None, jobs=None, force=False): +def get(url, path, out=None, rev=None, jobs=None, force=False, config=None): + from dvc.config import Config from dvc.dvcfile import is_valid_filename from dvc.fs.callbacks import Callback from dvc.repo import Repo @@ -29,11 +30,15 @@ def get(url, path, out=None, rev=None, jobs=None, force=False): if is_valid_filename(out): raise GetDVCFileError + if config and not isinstance(config, dict): + config = Config.load_file(config) + with Repo.open( url=url, rev=rev, subrepos=True, uninitialized=True, + config=config, ) as repo: from dvc.fs.data import DataFileSystem diff --git a/dvc/repo/imp.py b/dvc/repo/imp.py index 0bdf9d8eb5..558fa56dae 100644 --- a/dvc/repo/imp.py +++ b/dvc/repo/imp.py @@ -1,6 +1,9 @@ -def imp(self, url, path, out=None, rev=None, **kwargs): +def imp(self, url, path, out=None, rev=None, config=None, **kwargs): erepo = {"url": url} if rev is not None: erepo["rev"] = rev + if config is not None: + erepo["config"] = config + return self.imp_url(path, out=out, erepo=erepo, frozen=True, **kwargs) diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index 65b9b497a8..ce408bbb5e 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -13,6 +13,7 @@ def ls( rev: Optional[str] = None, recursive: Optional[bool] = None, dvc_only: bool = False, + config: Optional[str] = None, ): """Methods for getting files and outputs for the repo. @@ -22,6 +23,7 @@ def ls( rev (str, optional): SHA commit, branch or tag name recursive (bool, optional): recursively walk the repo dvc_only (bool, optional): show only DVC-artifacts + config (bool, optional): path to config file Returns: list of `entry` @@ -35,9 +37,18 @@ def ls( "isexec": bool, } """ + from dvc.config import Config + from . import Repo - with Repo.open(url, rev=rev, subrepos=True, uninitialized=True) as repo: + if config and not isinstance(config, dict): + config_dict = Config.load_file(config) + else: + config_dict = None + + with Repo.open( + url, rev=rev, subrepos=True, uninitialized=True, config=config_dict + ) as repo: path = path or "" ret = _ls(repo, path, recursive, dvc_only) diff --git a/tests/unit/command/ls/test_ls.py b/tests/unit/command/ls/test_ls.py index 480d517df8..1d6702624d 100644 --- a/tests/unit/command/ls/test_ls.py +++ b/tests/unit/command/ls/test_ls.py @@ -18,32 +18,50 @@ def _test_cli(mocker, *args): def test_list(mocker): url = "local_dir" m = _test_cli(mocker, url) - m.assert_called_once_with(url, None, recursive=False, rev=None, dvc_only=False) + m.assert_called_once_with( + url, None, recursive=False, rev=None, dvc_only=False, config=None + ) def test_list_recursive(mocker): url = "local_dir" m = _test_cli(mocker, url, "-R") - m.assert_called_once_with(url, None, recursive=True, rev=None, dvc_only=False) + m.assert_called_once_with( + url, None, recursive=True, rev=None, dvc_only=False, config=None + ) def test_list_git_ssh_rev(mocker): url = "git@github.com:repo" m = _test_cli(mocker, url, "--rev", "123") - m.assert_called_once_with(url, None, recursive=False, rev="123", dvc_only=False) + m.assert_called_once_with( + url, None, recursive=False, rev="123", dvc_only=False, config=None + ) def test_list_targets(mocker): url = "local_dir" target = "subdir" m = _test_cli(mocker, url, target) - m.assert_called_once_with(url, target, recursive=False, rev=None, dvc_only=False) + m.assert_called_once_with( + url, target, recursive=False, rev=None, dvc_only=False, config=None + ) def test_list_outputs_only(mocker): url = "local_dir" m = _test_cli(mocker, url, None, "--dvc-only") - m.assert_called_once_with(url, None, recursive=False, rev=None, dvc_only=True) + m.assert_called_once_with( + url, None, recursive=False, rev=None, dvc_only=True, config=None + ) + + +def test_list_config(mocker): + url = "local_dir" + m = _test_cli(mocker, url, None, "--config", "myconfig") + m.assert_called_once_with( + url, None, recursive=False, rev=None, dvc_only=False, config="myconfig" + ) def test_show_json(mocker, capsys): diff --git a/tests/unit/command/test_get.py b/tests/unit/command/test_get.py index 6a8a0a705a..3e2cfad507 100644 --- a/tests/unit/command/test_get.py +++ b/tests/unit/command/test_get.py @@ -14,6 +14,8 @@ def test_get(mocker): "version", "--jobs", "4", + "--config", + "myconfig", ] ) assert cli_args.func == CmdGet @@ -24,7 +26,13 @@ def test_get(mocker): assert cmd.run() == 0 m.assert_called_once_with( - "repo_url", path="src", out="out", rev="version", jobs=4, force=False + "repo_url", + path="src", + out="out", + rev="version", + jobs=4, + config="myconfig", + force=False, ) diff --git a/tests/unit/command/test_imp.py b/tests/unit/command/test_imp.py index f2a95b55b2..83b49d8bd8 100644 --- a/tests/unit/command/test_imp.py +++ b/tests/unit/command/test_imp.py @@ -14,6 +14,8 @@ def test_import(mocker, dvc): "version", "--jobs", "3", + "--config", + "myconfig", ] ) assert cli_args.func == CmdImport @@ -31,6 +33,7 @@ def test_import(mocker, dvc): no_exec=False, no_download=False, jobs=3, + config="myconfig", ) @@ -61,6 +64,7 @@ def test_import_no_exec(mocker, dvc): no_exec=True, no_download=False, jobs=None, + config=None, ) @@ -91,4 +95,5 @@ def test_import_no_download(mocker, dvc): no_exec=False, no_download=True, jobs=None, + config=None, )