From afc8886ffc6a9dd8906569d2a4dde65b1ac0a692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Thu, 9 Jan 2020 18:15:25 +0000 Subject: [PATCH 01/23] get: handle non-DVC repositories Allows us to `dvc get` from non-DVC source repositories. Fixes #3089 --- dvc/exceptions.py | 5 ----- dvc/external_repo.py | 4 ++-- dvc/repo/get.py | 39 ++++++++++++++++++++++----------------- tests/func/test_get.py | 10 +++------- 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index e48014e95a..a551e35db1 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -240,11 +240,6 @@ def __init__(self, ignore_dirname): ) -class UrlNotDvcRepoError(DvcException): - def __init__(self, url): - super().__init__("URL '{}' is not a dvc repository.".format(url)) - - class GitHookAlreadyExistsError(DvcException): def __init__(self, hook_name): super().__init__( diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 9ff2f2a413..8f7cfaef55 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): repo.close() -def cached_clone(url, rev=None, **_ignored_kwargs): +def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, **_ignored_kwargs): """ - new_path = tempfile.mkdtemp("dvc-erepo") + new_path = clone_path or tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clean clone if (url, None, None) in REPO_CACHE: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 17607bbfae..58eddfcbe9 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -7,10 +7,9 @@ DvcException, NotDvcRepoError, OutputNotFoundError, - UrlNotDvcRepoError, PathMissingError, ) -from dvc.external_repo import external_repo +from dvc.external_repo import cached_clone from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.utils import resolve_output @@ -28,8 +27,15 @@ def __init__(self): ) +# Dummy exception raised to signal a plain file copy is needed +class _DoPlainCopy(DvcException): + pass + + @staticmethod def get(url, path, out=None, rev=None): + from dvc.repo import Repo + out = resolve_output(path, out) if Stage.is_valid_filename(out): @@ -43,7 +49,8 @@ def get(url, path, out=None, rev=None): dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: - with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: + cached_clone(url, rev=rev, clone_path=tmp_dir) + try: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are @@ -53,26 +60,24 @@ def get(url, path, out=None, rev=None): # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. + repo = Repo(tmp_dir) repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] + output = repo.find_out_by_relpath(path) + if not output.use_cache: + # Catch this below and go for a plain old fs_copy + raise _DoPlainCopy + _get_cached(repo, output, out) - try: - output = repo.find_out_by_relpath(path) - except OutputNotFoundError: - output = None - - if output and output.use_cache: - _get_cached(repo, output, out) - else: - # Either an uncached out with absolute path or a user error - if os.path.isabs(path): - raise FileNotFoundError + except (NotDvcRepoError, OutputNotFoundError, _DoPlainCopy): + # It's an uncached out with absolute path, a non-DVC repo, or a + # user error + if os.path.isabs(path): + raise FileNotFoundError - fs_copy(os.path.join(repo.root_dir, path), out) + fs_copy(os.path.join(tmp_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) - except NotDvcRepoError: - raise UrlNotDvcRepoError(url) finally: remove(tmp_dir) diff --git a/tests/func/test_get.py b/tests/func/test_get.py index 99fd23840b..f988a5c286 100644 --- a/tests/func/test_get.py +++ b/tests/func/test_get.py @@ -5,7 +5,6 @@ from dvc.cache import Cache from dvc.config import Config -from dvc.exceptions import UrlNotDvcRepoError from dvc.repo.get import GetDVCFileError, PathMissingError from dvc.repo import Repo from dvc.system import System @@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir): def test_get_from_non_dvc_repo(tmp_dir, erepo_dir): erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True) erepo_dir.scm.commit("remove dvc") + erepo_dir.scm_gen({"some_file": "contents"}, commit="create file") - with pytest.raises(UrlNotDvcRepoError): - Repo.get(fspath(erepo_dir), "some_file.zip") + Repo.get(fspath(erepo_dir), "some_file", "file_imported") + assert (tmp_dir / "file_imported").read_text() == "contents" def test_get_a_dvc_file(tmp_dir, erepo_dir): @@ -164,10 +164,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog): erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True) erepo_dir.dvc.scm.commit("remove .dvc") - # sanity check - with pytest.raises(UrlNotDvcRepoError): - Repo.get(fspath(erepo_dir), "some_file") - caplog.clear() dst = "file_imported" with caplog.at_level(logging.INFO, logger="dvc"): From d09285418ee2d8c4e8a0b51147ab607e6229d179 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 14:00:26 +0000 Subject: [PATCH 02/23] style: improve code flow and move comments --- dvc/repo/get.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 58eddfcbe9..785c388a4a 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -51,6 +51,8 @@ def get(url, path, out=None, rev=None): try: cached_clone(url, rev=rev, clone_path=tmp_dir) try: + repo = Repo(tmp_dir) + # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are @@ -60,21 +62,23 @@ def get(url, path, out=None, rev=None): # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. - repo = Repo(tmp_dir) repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] + output = repo.find_out_by_relpath(path) - if not output.use_cache: + if output.use_cache: # Catch this below and go for a plain old fs_copy - raise _DoPlainCopy - _get_cached(repo, output, out) + _get_cached(repo, output, out) + return + + except (NotDvcRepoError, OutputNotFoundError): + pass - except (NotDvcRepoError, OutputNotFoundError, _DoPlainCopy): - # It's an uncached out with absolute path, a non-DVC repo, or a - # user error - if os.path.isabs(path): - raise FileNotFoundError + # It's an uncached out with absolute path, a non-DVC repo, or a + # user error + if os.path.isabs(path): + raise FileNotFoundError - fs_copy(os.path.join(tmp_dir, path), out) + fs_copy(os.path.join(tmp_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) From 09bdc7325ed90dd15e8c00dd25ecab2d398ea3b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 14:10:35 +0000 Subject: [PATCH 03/23] doc: change get documentation so that it doesn't imply the target must be DVC enabled --- dvc/command/get.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 9aba35c0d8..492d7efba4 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -31,7 +31,7 @@ def run(self): def add_parser(subparsers, parent_parser): - GET_HELP = "Download/copy files or directories from DVC repository." + GET_HELP = "Download/copy files or directories from git repository." get_parser = subparsers.add_parser( "get", parents=[parent_parser], @@ -40,10 +40,10 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) get_parser.add_argument( - "url", help="URL of Git repository with DVC project to download from." + "url", help="URL of Git repository to download from." ) get_parser.add_argument( - "path", help="Path to a file or directory within a DVC repository." + "path", help="Path to a file or directory within the repository." ) get_parser.add_argument( "-o", @@ -52,6 +52,6 @@ def add_parser(subparsers, parent_parser): help="Destination path to copy/download files to.", ) get_parser.add_argument( - "--rev", nargs="?", help="DVC repository git revision." + "--rev", nargs="?", help="Repository git revision." ) get_parser.set_defaults(func=CmdGet) From 8e4922800f6ce47210976764af1c05e31e90deac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 16:48:09 +0000 Subject: [PATCH 04/23] get: recoup cache optimal location --- dvc/external_repo.py | 4 ++-- dvc/repo/get.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 8f7cfaef55..9ff2f2a413 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): repo.close() -def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): +def cached_clone(url, rev=None, **_ignored_kwargs): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): """ - new_path = clone_path or tempfile.mkdtemp("dvc-erepo") + new_path = tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clean clone if (url, None, None) in REPO_CACHE: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 785c388a4a..49bb4e9013 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -3,6 +3,8 @@ import shortuuid +from dvc.cache import CacheConfig +from dvc.config import Config from dvc.exceptions import ( DvcException, NotDvcRepoError, @@ -49,9 +51,11 @@ def get(url, path, out=None, rev=None): dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: - cached_clone(url, rev=rev, clone_path=tmp_dir) + clone_dir = cached_clone(url, rev=rev) try: - repo = Repo(tmp_dir) + repo = Repo(clone_dir) + cache_config = CacheConfig(repo.config) + cache_config.set_dir(tmp_dir, level=Config.LEVEL_LOCAL) # Try any links possible to avoid data duplication. # @@ -78,12 +82,13 @@ def get(url, path, out=None, rev=None): if os.path.isabs(path): raise FileNotFoundError - fs_copy(os.path.join(tmp_dir, path), out) + fs_copy(os.path.join(clone_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir) + remove(clone_dir) def _get_cached(repo, output, out): From 3e386d93ba8da28f7d16513187fe46623cd03e3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 16:58:53 +0000 Subject: [PATCH 05/23] Update dvc/repo/get.py Co-Authored-By: Ruslan Kuprieiev --- dvc/repo/get.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 49bb4e9013..4c1f554237 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -70,7 +70,6 @@ def get(url, path, out=None, rev=None): output = repo.find_out_by_relpath(path) if output.use_cache: - # Catch this below and go for a plain old fs_copy _get_cached(repo, output, out) return From bfe068809427a5b2313e0b7b5b1aaed8259d3fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 16:59:40 +0000 Subject: [PATCH 06/23] Revert "get: recoup cache optimal location" This reverts commit 8e4922800f6ce47210976764af1c05e31e90deac. --- dvc/external_repo.py | 4 ++-- dvc/repo/get.py | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 9ff2f2a413..8f7cfaef55 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): repo.close() -def cached_clone(url, rev=None, **_ignored_kwargs): +def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, **_ignored_kwargs): """ - new_path = tempfile.mkdtemp("dvc-erepo") + new_path = clone_path or tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clean clone if (url, None, None) in REPO_CACHE: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 4c1f554237..19028861c8 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -3,8 +3,6 @@ import shortuuid -from dvc.cache import CacheConfig -from dvc.config import Config from dvc.exceptions import ( DvcException, NotDvcRepoError, @@ -51,11 +49,9 @@ def get(url, path, out=None, rev=None): dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: - clone_dir = cached_clone(url, rev=rev) + cached_clone(url, rev=rev, clone_path=tmp_dir) try: - repo = Repo(clone_dir) - cache_config = CacheConfig(repo.config) - cache_config.set_dir(tmp_dir, level=Config.LEVEL_LOCAL) + repo = Repo(tmp_dir) # Try any links possible to avoid data duplication. # @@ -81,13 +77,12 @@ def get(url, path, out=None, rev=None): if os.path.isabs(path): raise FileNotFoundError - fs_copy(os.path.join(clone_dir, path), out) + fs_copy(os.path.join(tmp_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir) - remove(clone_dir) def _get_cached(repo, output, out): From d8621152852904d6caf9db2bf4487d7b1f521223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 17:00:04 +0000 Subject: [PATCH 07/23] remove unused exception class --- dvc/repo/get.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 19028861c8..0585cbfc97 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -27,11 +27,6 @@ def __init__(self): ) -# Dummy exception raised to signal a plain file copy is needed -class _DoPlainCopy(DvcException): - pass - - @staticmethod def get(url, path, out=None, rev=None): from dvc.repo import Repo From 0f7dad7b934f4414a6269d364a6e8ad97b429881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 17:29:21 +0000 Subject: [PATCH 08/23] change command doc string --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 492d7efba4..1b1e1bffab 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -31,7 +31,7 @@ def run(self): def add_parser(subparsers, parent_parser): - GET_HELP = "Download/copy files or directories from git repository." + GET_HELP = "Download/copy files or directories from Git repository." get_parser = subparsers.add_parser( "get", parents=[parent_parser], From cabbb32bb4435ecb5f01ae999d0f0247d1dbc620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 18:07:53 +0000 Subject: [PATCH 09/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 1b1e1bffab..90cac549c7 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -52,6 +52,6 @@ def add_parser(subparsers, parent_parser): help="Destination path to copy/download files to.", ) get_parser.add_argument( - "--rev", nargs="?", help="Repository git revision." + "--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)" ) get_parser.set_defaults(func=CmdGet) From 36bd848cb6b283e1c94e47f19b0f4fecfc40802a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 18:08:07 +0000 Subject: [PATCH 10/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 90cac549c7..3774bd9cb9 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -43,7 +43,7 @@ def add_parser(subparsers, parent_parser): "url", help="URL of Git repository to download from." ) get_parser.add_argument( - "path", help="Path to a file or directory within the repository." + "path", help="Path to a file or directory within the project or repository" ) get_parser.add_argument( "-o", From c3314b5aa2314885530e68f193ac20d37f477bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 18:08:33 +0000 Subject: [PATCH 11/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 3774bd9cb9..daae7fa266 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -40,7 +40,7 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) get_parser.add_argument( - "url", help="URL of Git repository to download from." + "url", help="Location of DVC project or Git repository to download from" ) get_parser.add_argument( "path", help="Path to a file or directory within the project or repository" From 724283fa382843e6dc11c16045e966cacce6cd30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 18:08:46 +0000 Subject: [PATCH 12/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index daae7fa266..7d5af18c79 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -31,7 +31,7 @@ def run(self): def add_parser(subparsers, parent_parser): - GET_HELP = "Download/copy files or directories from Git repository." + GET_HELP = "Download a file or directory from any DVC project or Git repository" get_parser = subparsers.add_parser( "get", parents=[parent_parser], From f5cd4247d6970fceb7821b411896abc6f8a51fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 18:27:04 +0000 Subject: [PATCH 13/23] import: update documentation Fixes #3077 --- dvc/command/get.py | 10 +++++++--- dvc/command/imp.py | 12 ++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 7d5af18c79..38ad490003 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -31,7 +31,9 @@ def run(self): def add_parser(subparsers, parent_parser): - GET_HELP = "Download a file or directory from any DVC project or Git repository" + GET_HELP = ( + "Download a file or directory from any DVC project or Git repository" + ) get_parser = subparsers.add_parser( "get", parents=[parent_parser], @@ -40,10 +42,12 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) get_parser.add_argument( - "url", help="Location of DVC project or Git repository to download from" + "url", + help="Location of DVC project or Git repository to download from.", ) get_parser.add_argument( - "path", help="Path to a file or directory within the project or repository" + "path", + help="Path to a file or directory within the project or repository", ) get_parser.add_argument( "-o", diff --git a/dvc/command/imp.py b/dvc/command/imp.py index 9315f03744..b3fe61a1b8 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -30,7 +30,8 @@ def run(self): def add_parser(subparsers, parent_parser): IMPORT_HELP = ( - "Download data from DVC repository and take it under DVC control." + "Download data from DVC project or Git repository and take it under " + "DVC control." ) import_parser = subparsers.add_parser( @@ -41,13 +42,12 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawTextHelpFormatter, ) import_parser.add_argument( - "url", help="URL of Git repository with DVC project to download from." + "url", + help="Location of Git repository with DVC project to download from.", ) + import_parser.add_argument("path", help="Path to data within DVC project.") import_parser.add_argument( - "path", help="Path to data within DVC repository." - ) - import_parser.add_argument( - "-o", "--out", nargs="?", help="Destination path to put data to." + "-o", "--out", nargs="?", help="Destination path to put data in." ) import_parser.add_argument( "--rev", nargs="?", help="DVC repository git revision." From 8db43b96da9e7cc4ea30ddb1be1ad22639ec2cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:22:28 +0000 Subject: [PATCH 14/23] get: leverage external_repo() context manager for DVC repositories --- dvc/external_repo.py | 4 +-- dvc/repo/get.py | 63 ++++++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 8f7cfaef55..9ff2f2a413 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None): repo.close() -def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): +def cached_clone(url, rev=None, **_ignored_kwargs): """Clone an external git repo to a temporary directory. Returns the path to a local temporary directory with the specified @@ -44,7 +44,7 @@ def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs): """ - new_path = clone_path or tempfile.mkdtemp("dvc-erepo") + new_path = tempfile.mkdtemp("dvc-erepo") # Copy and adjust existing clean clone if (url, None, None) in REPO_CACHE: diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 0585cbfc97..745e24fe1c 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -9,7 +9,7 @@ OutputNotFoundError, PathMissingError, ) -from dvc.external_repo import cached_clone +from dvc.external_repo import external_repo, cached_clone from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.utils import resolve_output @@ -29,8 +29,6 @@ def __init__(self): @staticmethod def get(url, path, out=None, rev=None): - from dvc.repo import Repo - out = resolve_output(path, out) if Stage.is_valid_filename(out): @@ -43,41 +41,50 @@ def get(url, path, out=None, rev=None): # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) + raw_git_dir = None try: - cached_clone(url, rev=rev, clone_path=tmp_dir) try: - repo = Repo(tmp_dir) - - # Try any links possible to avoid data duplication. - # - # Not using symlink, because we need to remove cache after we are - # done, and to make that work we would have to copy data over - # anyway before removing the cache, so we might just copy it - # right away. - # - # Also, we can't use theoretical "move" link type here, because - # the same cache file might be used a few times in a directory. - repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] - - output = repo.find_out_by_relpath(path) - if output.use_cache: - _get_cached(repo, output, out) + with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: + # Try any links possible to avoid data duplication. + # + # Not using symlink, because we need to remove cache after we + # are done, and to make that work we would have to copy data + # over anyway before removing the cache, so we might just copy + # it right away. + # + # Also, we can't use theoretical "move" link type here, because + # the same cache file might be used a few times in a directory. + repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] + + try: + output = repo.find_out_by_relpath(path) + except OutputNotFoundError: + output = None + + if output and output.use_cache: + _get_cached(repo, output, out) + return + + # Either an uncached out with absolute path or a user error + + if os.path.isabs(path): + raise FileNotFoundError + + fs_copy(os.path.join(repo.root_dir, path), out) return - except (NotDvcRepoError, OutputNotFoundError): + except NotDvcRepoError: + # Not a DVC repository, continue below and copy from git pass - # It's an uncached out with absolute path, a non-DVC repo, or a - # user error - if os.path.isabs(path): - raise FileNotFoundError - - fs_copy(os.path.join(tmp_dir, path), out) - + raw_git_dir = cached_clone(url, rev=rev) + fs_copy(os.path.join(raw_git_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir) + if raw_git_dir: + remove(raw_git_dir) def _get_cached(repo, output, out): From 9140e269a7f2ba31ef9a7b04a0e40cb96896365a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:25:16 +0000 Subject: [PATCH 15/23] Update dvc/command/imp.py Co-Authored-By: Jorge Orpinel --- dvc/command/imp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/imp.py b/dvc/command/imp.py index b3fe61a1b8..f6df5357f7 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -30,7 +30,7 @@ def run(self): def add_parser(subparsers, parent_parser): IMPORT_HELP = ( - "Download data from DVC project or Git repository and take it under " + "Download a file or directory from any DVC project or Git repository and take it under " "DVC control." ) From e0c533f498f8d16a735e1ff4feb0db59767a044b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:25:26 +0000 Subject: [PATCH 16/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 38ad490003..1cbb64be2d 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -32,7 +32,7 @@ def run(self): def add_parser(subparsers, parent_parser): GET_HELP = ( - "Download a file or directory from any DVC project or Git repository" + "Download a file or directory from any DVC project or Git repository." ) get_parser = subparsers.add_parser( "get", From 97d092573e239804c84a55749a89f1c559d5a297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:25:42 +0000 Subject: [PATCH 17/23] Update dvc/command/imp.py Co-Authored-By: Jorge Orpinel --- dvc/command/imp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/imp.py b/dvc/command/imp.py index f6df5357f7..d80915dc47 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -43,7 +43,7 @@ def add_parser(subparsers, parent_parser): ) import_parser.add_argument( "url", - help="Location of Git repository with DVC project to download from.", + help="Location of DVC project or Git repository to download from", ) import_parser.add_argument("path", help="Path to data within DVC project.") import_parser.add_argument( From 5d0fc1a9ca7c18b1becf4030eb2c3ea25754f3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:26:18 +0000 Subject: [PATCH 18/23] Update dvc/command/imp.py Co-Authored-By: Jorge Orpinel --- dvc/command/imp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/imp.py b/dvc/command/imp.py index d80915dc47..e30dc714d2 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -50,6 +50,6 @@ def add_parser(subparsers, parent_parser): "-o", "--out", nargs="?", help="Destination path to put data in." ) import_parser.add_argument( - "--rev", nargs="?", help="DVC repository git revision." + "--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)" ) import_parser.set_defaults(func=CmdImport) From ff2713222ab29ee773583498eb778f0a6a9989d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:26:34 +0000 Subject: [PATCH 19/23] Update dvc/command/imp.py Co-Authored-By: Jorge Orpinel --- dvc/command/imp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/imp.py b/dvc/command/imp.py index e30dc714d2..d664d99f70 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -47,7 +47,7 @@ def add_parser(subparsers, parent_parser): ) import_parser.add_argument("path", help="Path to data within DVC project.") import_parser.add_argument( - "-o", "--out", nargs="?", help="Destination path to put data in." + "-o", "--out", nargs="?", help="Destination path to download files to" ) import_parser.add_argument( "--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)" From 260ab15c3ae10fc9c6a6ce24769c45b83958c8e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:26:58 +0000 Subject: [PATCH 20/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 1cbb64be2d..a09a97852b 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -53,7 +53,7 @@ def add_parser(subparsers, parent_parser): "-o", "--out", nargs="?", - help="Destination path to copy/download files to.", + help="Destination path to download files to", ) get_parser.add_argument( "--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)" From f1e198bc99050272497b72ee28fe314132407028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:27:20 +0000 Subject: [PATCH 21/23] Update dvc/command/imp.py Co-Authored-By: Jorge Orpinel --- dvc/command/imp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/imp.py b/dvc/command/imp.py index d664d99f70..8797d3c05c 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -45,7 +45,7 @@ def add_parser(subparsers, parent_parser): "url", help="Location of DVC project or Git repository to download from", ) - import_parser.add_argument("path", help="Path to data within DVC project.") + import_parser.add_argument("path", help="Path to a file or directory within the project or repository") import_parser.add_argument( "-o", "--out", nargs="?", help="Destination path to download files to" ) From 802d8ab72efaac7aad9569b3aa5bb7f7e4203463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Santos?= Date: Fri, 10 Jan 2020 22:27:31 +0000 Subject: [PATCH 22/23] Update dvc/command/get.py Co-Authored-By: Jorge Orpinel --- dvc/command/get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index a09a97852b..97d9317d71 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -43,7 +43,7 @@ def add_parser(subparsers, parent_parser): ) get_parser.add_argument( "url", - help="Location of DVC project or Git repository to download from.", + help="Location of DVC project or Git repository to download from", ) get_parser.add_argument( "path", From 97b3c4a88a256df4177f935561fd2b18dd19c7aa Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Fri, 10 Jan 2020 22:27:45 +0000 Subject: [PATCH 23/23] Restyled by black --- dvc/command/get.py | 5 +---- dvc/command/imp.py | 5 ++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dvc/command/get.py b/dvc/command/get.py index 97d9317d71..22856f1c07 100644 --- a/dvc/command/get.py +++ b/dvc/command/get.py @@ -50,10 +50,7 @@ def add_parser(subparsers, parent_parser): help="Path to a file or directory within the project or repository", ) get_parser.add_argument( - "-o", - "--out", - nargs="?", - help="Destination path to download files to", + "-o", "--out", nargs="?", help="Destination path to download files to" ) get_parser.add_argument( "--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)" diff --git a/dvc/command/imp.py b/dvc/command/imp.py index 8797d3c05c..743671894f 100644 --- a/dvc/command/imp.py +++ b/dvc/command/imp.py @@ -45,7 +45,10 @@ def add_parser(subparsers, parent_parser): "url", help="Location of DVC project or Git repository to download from", ) - import_parser.add_argument("path", help="Path to a file or directory within the project or repository") + import_parser.add_argument( + "path", + help="Path to a file or directory within the project or repository", + ) import_parser.add_argument( "-o", "--out", nargs="?", help="Destination path to download files to" )