From c04f5a0f9da1b5e6b5857c30b916bbaebe41e43a Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Tue, 3 Dec 2019 19:07:39 +0200 Subject: [PATCH 01/13] import: allow downloading regular files/dirs tracked by git Fixes #2862 --- dvc/dependency/repo.py | 42 +++++++++++++++++++++++-- dvc/exceptions.py | 9 ++++++ dvc/repo/get.py | 21 ++----------- dvc/utils/fs.py | 7 +++++ tests/func/test_import.py | 65 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 121 insertions(+), 23 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index f0ff9df065..b2932f6176 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import copy +import os from contextlib import contextmanager from funcy import merge @@ -8,6 +9,9 @@ from .local import DependencyLOCAL from dvc.external_repo import external_repo from dvc.utils.compat import str +from dvc.exceptions import OutputNotFoundError +from dvc.exceptions import PathMissingError +from dvc.utils.fs import fs_copy class DependencyREPO(DependencyLOCAL): @@ -72,10 +76,42 @@ def fetch(self): return out + @staticmethod + def _is_git_file(repo, path): + if not os.path.isabs(path): + try: + output = repo.find_out_by_relpath(path) + if not output.use_cache: + return True + except OutputNotFoundError: + return True + return False + + def _copy_if_git_file(self, to_path): + src_path = self.def_path + with self._make_repo( + cache_dir=self.repo.cache.local.cache_dir + ) as repo: + if not self._is_git_file(repo, src_path): + return False + + src_full_path = os.path.join(repo.root_dir, src_path) + dst_full_path = os.path.abspath(to_path) + fs_copy(src_full_path, dst_full_path) + return True + def download(self, to): - out = self.fetch() - to.info = copy.copy(out.info) - to.checkout() + try: + if self._copy_if_git_file(to.fspath): + return + + out = self.fetch() + to.info = copy.copy(out.info) + to.checkout() + except (FileNotFoundError): + raise PathMissingError( + self.def_path, self.def_repo[self.PARAM_URL] + ) def update(self): with self._make_repo(rev_lock=None) as repo: diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 1beb8d6e1c..7151e2e2fa 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -344,3 +344,12 @@ def __init__(self, path, external_repo_path, external_repo_url): class HTTPError(DvcException): def __init__(self, code, reason): super(HTTPError, self).__init__("'{} {}'".format(code, reason)) + + +class PathMissingError(DvcException): + def __init__(self, path, repo): + msg = ( + "The path '{}' does not exist in the target repository '{}'" + " neighther as an output nor a git-handled file." + ) + super(PathMissingError, self).__init__(msg.format(path, repo)) diff --git a/dvc/repo/get.py b/dvc/repo/get.py index 60c4c2e4ac..3c7e1e68b5 100644 --- a/dvc/repo/get.py +++ b/dvc/repo/get.py @@ -1,6 +1,5 @@ import logging import os -import shutil from dvc.utils.compat import FileNotFoundError import shortuuid @@ -10,12 +9,14 @@ NotDvcRepoError, OutputNotFoundError, UrlNotDvcRepoError, + PathMissingError, ) from dvc.external_repo import external_repo from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.utils import resolve_output from dvc.utils.fs import remove +from dvc.utils.fs import fs_copy logger = logging.getLogger(__name__) @@ -28,15 +29,6 @@ def __init__(self): ) -class PathMissingError(DvcException): - def __init__(self, path, repo): - msg = ( - "The path '{}' does not exist in the target repository '{}'" - " neighther as an output nor a git-handled file." - ) - super(PathMissingError, self).__init__(msg.format(path, repo)) - - @staticmethod def get(url, path, out=None, rev=None): out = resolve_output(path, out) @@ -76,7 +68,7 @@ def get(url, path, out=None, rev=None): if os.path.isabs(path): raise FileNotFoundError - _copy(os.path.join(repo.root_dir, path), out) + fs_copy(os.path.join(repo.root_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) @@ -94,10 +86,3 @@ def _get_cached(repo, output, out): # This might happen when pull haven't really pulled all the files if failed: raise FileNotFoundError - - -def _copy(src, dst): - if os.path.isdir(src): - shutil.copytree(src, dst) - else: - shutil.copy2(src, dst) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 1b51576164..26ee05506e 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -22,6 +22,13 @@ logger = logging.getLogger(__name__) +def fs_copy(src, dst): + if os.path.isdir(src): + shutil.copytree(src, dst) + else: + shutil.copy2(src, dst) + + def get_inode(path): inode = System.inode(path) logger.debug("Path {} inode {}".format(path, inode)) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 20e40fde77..9e604ce665 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -8,7 +8,9 @@ from mock import patch from dvc.config import Config -from dvc.exceptions import DownloadError, NoOutputInExternalRepoError +from dvc.exceptions import DownloadError +from dvc.exceptions import PathMissingError +from dvc.exceptions import NoOutputInExternalRepoError from dvc.stage import Stage from dvc.system import System from dvc.utils import makedirs @@ -27,6 +29,40 @@ def test_import(git, dvc_repo, erepo): assert git.git.check_ignore(dst) +def test_import_git_file(git, dvc_repo, erepo): + src = "some_file" + dst = "some_file_imported" + + src_path = os.path.join(erepo.root_dir, src) + erepo.create(src_path, "hello") + erepo.dvc.scm.add([src_path]) + erepo.dvc.scm.commit("add a regular file") + + dvc_repo.imp(erepo.root_dir, src, dst) + + assert os.path.exists(dst) + assert os.path.isfile(dst) + assert filecmp.cmp(src_path, dst, shallow=False) + assert git.git.check_ignore(dst) + + +def test_import_git_dir(git, dvc_repo, erepo): + src = "some_directory" + dst = "some_directory_imported" + + src_file_path = os.path.join(erepo.root_dir, src, "file.txt") + erepo.create(src_file_path, "hello") + erepo.dvc.scm.add([src_file_path]) + erepo.dvc.scm.commit("add a regular dir") + + dvc_repo.imp(erepo.root_dir, src, dst) + + assert os.path.exists(dst) + assert os.path.isdir(dst) + trees_equal(os.path.join(erepo.root_dir, src), dst) + assert git.git.check_ignore(dst) + + def test_import_dir(git, dvc_repo, erepo): src = erepo.DATA_DIR dst = erepo.DATA_DIR + "_imported" @@ -39,6 +75,28 @@ def test_import_dir(git, dvc_repo, erepo): assert git.git.check_ignore(dst) +def test_import_non_cached(git, dvc_repo, erepo): + src = "non_cached_output" + dst = src + "_imported" + + erepo.dvc.run( + cmd="echo hello > {}".format(src), + outs_no_cache=[src], + cwd=erepo.root_dir, + ) + + src_path = os.path.join(erepo.root_dir, src) + erepo.dvc.scm.add([src_path]) + erepo.dvc.scm.commit("add a non-cached output") + + dvc_repo.imp(erepo.root_dir, src, dst) + + assert os.path.exists(dst) + assert os.path.isfile(dst) + assert filecmp.cmp(src_path, dst, shallow=False) + assert git.git.check_ignore(dst) + + def test_import_rev(git, dvc_repo, erepo): src = "version" dst = src @@ -157,5 +215,8 @@ def test_pull_non_workspace(git, dvc_repo, erepo): def test_import_non_existing(dvc_repo, erepo): - with pytest.raises(NoOutputInExternalRepoError): + with pytest.raises(PathMissingError): dvc_repo.imp(erepo.root_dir, "invalid_output") + # https://github.com/iterative/dvc/pull/2837#discussion_r352123053 + with pytest.raises(NoOutputInExternalRepoError): + dvc_repo.imp(erepo.root_dir, "/root/", "root") From 0ee6998a29d0b93bc8e3ffaa7f36e12422b4a706 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Fri, 13 Dec 2019 15:20:41 +0200 Subject: [PATCH 02/13] New tests use tmp_dir --- tests/func/test_import.py | 61 +++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 9e604ce665..f8fc34f8f4 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -29,38 +29,32 @@ def test_import(git, dvc_repo, erepo): assert git.git.check_ignore(dst) -def test_import_git_file(git, dvc_repo, erepo): +def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): src = "some_file" dst = "some_file_imported" - src_path = os.path.join(erepo.root_dir, src) - erepo.create(src_path, "hello") - erepo.dvc.scm.add([src_path]) - erepo.dvc.scm.commit("add a regular file") + erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") - dvc_repo.imp(erepo.root_dir, src, dst) + tmp_dir.dvc.imp(str(erepo_dir), src, dst) - assert os.path.exists(dst) - assert os.path.isfile(dst) - assert filecmp.cmp(src_path, dst, shallow=False) - assert git.git.check_ignore(dst) + assert (tmp_dir / dst).exists() + assert os.path.isfile(tmp_dir / dst) + assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) + assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) -def test_import_git_dir(git, dvc_repo, erepo): +def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): src = "some_directory" dst = "some_directory_imported" - src_file_path = os.path.join(erepo.root_dir, src, "file.txt") - erepo.create(src_file_path, "hello") - erepo.dvc.scm.add([src_file_path]) - erepo.dvc.scm.commit("add a regular dir") + erepo_dir.scm_gen({src: {"file.txt": "hello"}}, commit="add a dir") - dvc_repo.imp(erepo.root_dir, src, dst) + tmp_dir.dvc.imp(str(erepo_dir), src, dst) - assert os.path.exists(dst) - assert os.path.isdir(dst) - trees_equal(os.path.join(erepo.root_dir, src), dst) - assert git.git.check_ignore(dst) + assert (tmp_dir / dst).exists() + assert os.path.isdir(tmp_dir / dst) + trees_equal(erepo_dir / src, tmp_dir / dst) + assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) def test_import_dir(git, dvc_repo, erepo): @@ -75,26 +69,25 @@ def test_import_dir(git, dvc_repo, erepo): assert git.git.check_ignore(dst) -def test_import_non_cached(git, dvc_repo, erepo): +def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): src = "non_cached_output" dst = src + "_imported" - erepo.dvc.run( + erepo_dir.dvc.run( cmd="echo hello > {}".format(src), outs_no_cache=[src], - cwd=erepo.root_dir, + cwd=str(erepo_dir), ) - src_path = os.path.join(erepo.root_dir, src) - erepo.dvc.scm.add([src_path]) - erepo.dvc.scm.commit("add a non-cached output") + erepo_dir.scm.add([str(erepo_dir / src)]) + erepo_dir.scm.commit("add a non-cached output") - dvc_repo.imp(erepo.root_dir, src, dst) + tmp_dir.dvc.imp(str(erepo_dir), src, dst) - assert os.path.exists(dst) - assert os.path.isfile(dst) - assert filecmp.cmp(src_path, dst, shallow=False) - assert git.git.check_ignore(dst) + assert (tmp_dir / dst).exists() + assert os.path.isfile(tmp_dir / dst) + assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) + assert tmp_dir.scm.repo.git.check_ignore(dst) def test_import_rev(git, dvc_repo, erepo): @@ -214,9 +207,9 @@ def test_pull_non_workspace(git, dvc_repo, erepo): assert os.path.exists(stage.outs[0].cache_path) -def test_import_non_existing(dvc_repo, erepo): +def test_import_non_existing(erepo_dir, tmp_dir, dvc): with pytest.raises(PathMissingError): - dvc_repo.imp(erepo.root_dir, "invalid_output") + tmp_dir.dvc.imp(str(erepo_dir), "invalid_output") # https://github.com/iterative/dvc/pull/2837#discussion_r352123053 with pytest.raises(NoOutputInExternalRepoError): - dvc_repo.imp(erepo.root_dir, "/root/", "root") + tmp_dir.dvc.imp(str(erepo_dir), "/root/", "root") From a36ac0c4ea78f40d8986346e8cfc2a4c533f974c Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Fri, 13 Dec 2019 15:35:02 +0200 Subject: [PATCH 03/13] dependency/repo: fix py2 support --- dvc/dependency/repo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index b2932f6176..df35e45235 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -3,6 +3,7 @@ import copy import os from contextlib import contextmanager +from dvc.utils.compat import FileNotFoundError from funcy import merge From 5ceaf5a6f0c6438fdcaaca32002027acf04e1dc9 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Fri, 13 Dec 2019 16:27:19 +0200 Subject: [PATCH 04/13] os.path.isfile(str(...)) --- tests/func/test_import.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index f8fc34f8f4..299348baaa 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -38,7 +38,7 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): tmp_dir.dvc.imp(str(erepo_dir), src, dst) assert (tmp_dir / dst).exists() - assert os.path.isfile(tmp_dir / dst) + assert os.path.isfile(str(tmp_dir / dst)) assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) @@ -85,7 +85,7 @@ def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): tmp_dir.dvc.imp(str(erepo_dir), src, dst) assert (tmp_dir / dst).exists() - assert os.path.isfile(tmp_dir / dst) + assert os.path.isfile(str(tmp_dir / dst)) assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) assert tmp_dir.scm.repo.git.check_ignore(dst) From c8c285536f996db5f719c85cc2ec8088fae9148c Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Fri, 13 Dec 2019 16:28:08 +0200 Subject: [PATCH 05/13] os.path.isdir(str(...)) --- tests/func/test_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 299348baaa..dacaca0f00 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -52,7 +52,7 @@ def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): tmp_dir.dvc.imp(str(erepo_dir), src, dst) assert (tmp_dir / dst).exists() - assert os.path.isdir(tmp_dir / dst) + assert os.path.isdir(str(tmp_dir / dst)) trees_equal(erepo_dir / src, tmp_dir / dst) assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) From 11d890a4fa85c77f8cebbbdab42eccb9a4b5e51b Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Fri, 13 Dec 2019 19:15:36 +0200 Subject: [PATCH 06/13] Wrap tmp_dir in str() --- tests/func/test_import.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index dacaca0f00..cce712f8a8 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -39,8 +39,8 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): assert (tmp_dir / dst).exists() assert os.path.isfile(str(tmp_dir / dst)) - assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) - assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) + assert filecmp.cmp(str(erepo_dir / src), str(tmp_dir / dst), shallow=False) + assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): @@ -53,8 +53,8 @@ def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): assert (tmp_dir / dst).exists() assert os.path.isdir(str(tmp_dir / dst)) - trees_equal(erepo_dir / src, tmp_dir / dst) - assert tmp_dir.scm.repo.git.check_ignore(tmp_dir / dst) + trees_equal(str(erepo_dir / src), str(tmp_dir / dst)) + assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) def test_import_dir(git, dvc_repo, erepo): @@ -86,7 +86,7 @@ def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): assert (tmp_dir / dst).exists() assert os.path.isfile(str(tmp_dir / dst)) - assert filecmp.cmp(erepo_dir / src, tmp_dir / dst, shallow=False) + assert filecmp.cmp(str(erepo_dir / src), str(tmp_dir / dst), shallow=False) assert tmp_dir.scm.repo.git.check_ignore(dst) From 48bf4490d59474fc890ec0ed658b74ac25f016c7 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sat, 14 Dec 2019 14:07:31 +0200 Subject: [PATCH 07/13] DependencyREPO.status: handle git-tracked files --- dvc/dependency/repo.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index df35e45235..a038c780d5 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -2,6 +2,7 @@ import copy import os +import filecmp from contextlib import contextmanager from dvc.utils.compat import FileNotFoundError @@ -12,6 +13,7 @@ from dvc.utils.compat import str from dvc.exceptions import OutputNotFoundError from dvc.exceptions import PathMissingError +from dvc.exceptions import NoOutputInExternalRepoError from dvc.utils.fs import fs_copy @@ -47,17 +49,31 @@ def _make_repo(self, **overrides): with external_repo(**merge(self.def_repo, overrides)) as repo: yield repo + def _git_status(self): + with self._make_repo() as old_repo: + with self._make_repo(rev_lock=None) as new_repo: + old_path = os.path.join(old_repo.root_dir, self.def_path) + new_path = os.path.join(new_repo.root_dir, self.def_path) + file_unchanged = filecmp.cmp(old_path, new_path, shallow=False) + if file_unchanged: + return {} + else: + return {str(self): "update available"} + def status(self): - with self._make_repo() as repo: - current = repo.find_out_by_relpath(self.def_path).info + try: + with self._make_repo() as repo: + current = repo.find_out_by_relpath(self.def_path).info - with self._make_repo(rev_lock=None) as repo: - updated = repo.find_out_by_relpath(self.def_path).info + with self._make_repo(rev_lock=None) as repo: + updated = repo.find_out_by_relpath(self.def_path).info - if current != updated: - return {str(self): "update available"} + if current != updated: + return {str(self): "update available"} - return {} + return {} + except NoOutputInExternalRepoError: + return self._git_status() def save(self): pass From 8985759d6670b9bd7831285388b30a79d0c8c44f Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sat, 14 Dec 2019 14:42:51 +0200 Subject: [PATCH 08/13] _git_status(): use different cache dirs --- dvc/dependency/repo.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index a038c780d5..28a9c9a2da 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -50,8 +50,13 @@ def _make_repo(self, **overrides): yield repo def _git_status(self): - with self._make_repo() as old_repo: - with self._make_repo(rev_lock=None) as new_repo: + cache_dir = self.repo.cache.local.cache_dir + with self._make_repo( + cache_dir=os.path.join(cache_dir, "old") + ) as old_repo: + with self._make_repo( + cache_dir=os.path.join(cache_dir, "new"), rev_lock=None + ) as new_repo: old_path = os.path.join(old_repo.root_dir, self.def_path) new_path = os.path.join(new_repo.root_dir, self.def_path) file_unchanged = filecmp.cmp(old_path, new_path, shallow=False) From 4d59bcddfdde0fb6ad2e2850359155b8e4d6de11 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sat, 14 Dec 2019 19:38:57 +0200 Subject: [PATCH 09/13] test_status_imported_git_file --- tests/func/test_import.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index cce712f8a8..12b6904c42 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -43,6 +43,16 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) +def test_status_imported_git_file(erepo_dir, tmp_dir, dvc, scm): + src = "some_file" + dst = "some_file_imported" + + erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") + + tmp_dir.dvc.imp(str(erepo_dir), src, dst) + tmp_dir.dvc.status([dst + ".dvc"]) + + def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): src = "some_directory" dst = "some_directory_imported" From 59156021de5c70008e59c8f32a8e4342ac478ab8 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sat, 14 Dec 2019 19:39:05 +0200 Subject: [PATCH 10/13] Revert "test_status_imported_git_file" This reverts commit 4d59bcddfdde0fb6ad2e2850359155b8e4d6de11. --- tests/func/test_import.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index 12b6904c42..cce712f8a8 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -43,16 +43,6 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) -def test_status_imported_git_file(erepo_dir, tmp_dir, dvc, scm): - src = "some_file" - dst = "some_file_imported" - - erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") - - tmp_dir.dvc.imp(str(erepo_dir), src, dst) - tmp_dir.dvc.status([dst + ".dvc"]) - - def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): src = "some_directory" dst = "some_directory_imported" From 7461f1484533707fbcfcf87f994fc952989a57f2 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sun, 15 Dec 2019 18:14:38 +0200 Subject: [PATCH 11/13] Revert "_git_status(): use different cache dirs" This reverts commit 8985759d6670b9bd7831285388b30a79d0c8c44f. --- dvc/dependency/repo.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 28a9c9a2da..a038c780d5 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -50,13 +50,8 @@ def _make_repo(self, **overrides): yield repo def _git_status(self): - cache_dir = self.repo.cache.local.cache_dir - with self._make_repo( - cache_dir=os.path.join(cache_dir, "old") - ) as old_repo: - with self._make_repo( - cache_dir=os.path.join(cache_dir, "new"), rev_lock=None - ) as new_repo: + with self._make_repo() as old_repo: + with self._make_repo(rev_lock=None) as new_repo: old_path = os.path.join(old_repo.root_dir, self.def_path) new_path = os.path.join(new_repo.root_dir, self.def_path) file_unchanged = filecmp.cmp(old_path, new_path, shallow=False) From c02c0169cf849064c59d672029e176af6cfc79cf Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sun, 15 Dec 2019 18:14:47 +0200 Subject: [PATCH 12/13] Revert "DependencyREPO.status: handle git-tracked files" This reverts commit 48bf4490d59474fc890ec0ed658b74ac25f016c7. --- dvc/dependency/repo.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index a038c780d5..df35e45235 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -2,7 +2,6 @@ import copy import os -import filecmp from contextlib import contextmanager from dvc.utils.compat import FileNotFoundError @@ -13,7 +12,6 @@ from dvc.utils.compat import str from dvc.exceptions import OutputNotFoundError from dvc.exceptions import PathMissingError -from dvc.exceptions import NoOutputInExternalRepoError from dvc.utils.fs import fs_copy @@ -49,31 +47,17 @@ def _make_repo(self, **overrides): with external_repo(**merge(self.def_repo, overrides)) as repo: yield repo - def _git_status(self): - with self._make_repo() as old_repo: - with self._make_repo(rev_lock=None) as new_repo: - old_path = os.path.join(old_repo.root_dir, self.def_path) - new_path = os.path.join(new_repo.root_dir, self.def_path) - file_unchanged = filecmp.cmp(old_path, new_path, shallow=False) - if file_unchanged: - return {} - else: - return {str(self): "update available"} - def status(self): - try: - with self._make_repo() as repo: - current = repo.find_out_by_relpath(self.def_path).info + with self._make_repo() as repo: + current = repo.find_out_by_relpath(self.def_path).info - with self._make_repo(rev_lock=None) as repo: - updated = repo.find_out_by_relpath(self.def_path).info + with self._make_repo(rev_lock=None) as repo: + updated = repo.find_out_by_relpath(self.def_path).info - if current != updated: - return {str(self): "update available"} + if current != updated: + return {str(self): "update available"} - return {} - except NoOutputInExternalRepoError: - return self._git_status() + return {} def save(self): pass From ad98f0fb50a3bf6811daa8a2c748d32c9da02409 Mon Sep 17 00:00:00 2001 From: Wojciech Baranowski Date: Sun, 15 Dec 2019 18:19:58 +0200 Subject: [PATCH 13/13] Use fspath() in tests --- tests/func/test_import.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tests/func/test_import.py b/tests/func/test_import.py index cce712f8a8..9a54b726f1 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -14,6 +14,7 @@ from dvc.stage import Stage from dvc.system import System from dvc.utils import makedirs +from dvc.utils.compat import fspath from tests.utils import trees_equal @@ -35,12 +36,13 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm): erepo_dir.scm_gen({src: "hello"}, commit="add a regular file") - tmp_dir.dvc.imp(str(erepo_dir), src, dst) + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) - assert (tmp_dir / dst).exists() - assert os.path.isfile(str(tmp_dir / dst)) - assert filecmp.cmp(str(erepo_dir / src), str(tmp_dir / dst), shallow=False) - assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) + assert (tmp_dir / dst).is_file() + assert filecmp.cmp( + fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False + ) + assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst)) def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): @@ -49,12 +51,11 @@ def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm): erepo_dir.scm_gen({src: {"file.txt": "hello"}}, commit="add a dir") - tmp_dir.dvc.imp(str(erepo_dir), src, dst) + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) - assert (tmp_dir / dst).exists() - assert os.path.isdir(str(tmp_dir / dst)) - trees_equal(str(erepo_dir / src), str(tmp_dir / dst)) - assert tmp_dir.scm.repo.git.check_ignore(str(tmp_dir / dst)) + assert (tmp_dir / dst).is_dir() + trees_equal(fspath(erepo_dir / src), fspath(tmp_dir / dst)) + assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst)) def test_import_dir(git, dvc_repo, erepo): @@ -76,17 +77,18 @@ def test_import_non_cached(erepo_dir, tmp_dir, dvc, scm): erepo_dir.dvc.run( cmd="echo hello > {}".format(src), outs_no_cache=[src], - cwd=str(erepo_dir), + cwd=fspath(erepo_dir), ) - erepo_dir.scm.add([str(erepo_dir / src)]) + erepo_dir.scm.add([fspath(erepo_dir / src)]) erepo_dir.scm.commit("add a non-cached output") - tmp_dir.dvc.imp(str(erepo_dir), src, dst) + tmp_dir.dvc.imp(fspath(erepo_dir), src, dst) - assert (tmp_dir / dst).exists() - assert os.path.isfile(str(tmp_dir / dst)) - assert filecmp.cmp(str(erepo_dir / src), str(tmp_dir / dst), shallow=False) + assert (tmp_dir / dst).is_file() + assert filecmp.cmp( + fspath(erepo_dir / src), fspath(tmp_dir / dst), shallow=False + ) assert tmp_dir.scm.repo.git.check_ignore(dst) @@ -209,7 +211,7 @@ def test_pull_non_workspace(git, dvc_repo, erepo): def test_import_non_existing(erepo_dir, tmp_dir, dvc): with pytest.raises(PathMissingError): - tmp_dir.dvc.imp(str(erepo_dir), "invalid_output") + tmp_dir.dvc.imp(fspath(erepo_dir), "invalid_output") # https://github.com/iterative/dvc/pull/2837#discussion_r352123053 with pytest.raises(NoOutputInExternalRepoError): - tmp_dir.dvc.imp(str(erepo_dir), "/root/", "root") + tmp_dir.dvc.imp(fspath(erepo_dir), "/root/", "root")