From a2cc0858cc0e6bcd6f2462746b413817590f5b08 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 2 Jul 2020 21:45:03 +0300 Subject: [PATCH 1/2] [RFC] tests: make read_text() work with directories This PR is just a showcase to see how we like it. I've converted a few tests to it, but there will be much more to come. ``` assert (tmp_dir / "file") == "contents" assert (tmp_dir / "dir").read_text() == {"subdir": {"file": "hello"}} ``` This approach is nicer because it allows us to use a declaratory asserts instead of relying on fs files. The problem becomes much more apparent once you get into remote workspaces, where we `trees_equal` doesn't work. --- tests/dir_helpers.py | 11 +++++++++++ tests/func/test_get.py | 8 ++------ tests/func/test_import.py | 10 +++------- tests/utils/__init__.py | 11 ----------- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/tests/dir_helpers.py b/tests/dir_helpers.py index 4395a2a022..02ae154fd6 100644 --- a/tests/dir_helpers.py +++ b/tests/dir_helpers.py @@ -221,6 +221,17 @@ def branch(self, name, new=False): finally: self.scm.checkout(old) + def read_text(self, *args, **kwargs): # pylint: disable=signature-differs + # NOTE: on windows we'll get PermissionError instead of + # IsADirectoryError when we try to `open` a directory, so we can't + # rely on exception flow control + if self.is_dir(): + return { + path.name: path.read_text(*args, **kwargs) + for path in self.iterdir() + } + return super().read_text(*args, **kwargs) + def _coerce_filenames(filenames): if isinstance(filenames, (str, bytes, pathlib.PurePath)): diff --git a/tests/func/test_get.py b/tests/func/test_get.py index ccb8160bfc..72736f8077 100644 --- a/tests/func/test_get.py +++ b/tests/func/test_get.py @@ -10,7 +10,6 @@ from dvc.repo.get import GetDVCFileError from dvc.system import System from dvc.utils.fs import makedirs -from tests.utils import trees_equal def test_get_repo_file(tmp_dir, erepo_dir): @@ -29,8 +28,7 @@ def test_get_repo_dir(tmp_dir, erepo_dir): Repo.get(os.fspath(erepo_dir), "dir", "dir_imported") - assert os.path.isdir("dir_imported") - trees_equal(erepo_dir / "dir", "dir_imported") + assert (tmp_dir / "dir_imported").read_text() == {"file": "contents"} @pytest.mark.parametrize( @@ -44,7 +42,6 @@ def test_get_git_file(tmp_dir, erepo): Repo.get(os.fspath(erepo), src, dst) - assert (tmp_dir / dst).is_file() assert (tmp_dir / dst).read_text() == "hello" @@ -61,8 +58,7 @@ def test_get_git_dir(tmp_dir, erepo): Repo.get(os.fspath(erepo), src, dst) - assert (tmp_dir / dst).is_dir() - trees_equal(erepo / src, tmp_dir / dst) + assert (tmp_dir / dst).read_text() == {"dir": {"file.txt": "hello"}} def test_cache_type_is_properly_overridden(tmp_dir, erepo_dir): diff --git a/tests/func/test_import.py b/tests/func/test_import.py index b97d9ca102..d1f6b5e59c 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -11,7 +11,6 @@ from dvc.exceptions import DownloadError, PathMissingError from dvc.system import System from dvc.utils.fs import makedirs, remove -from tests.utils import trees_equal def test_import(tmp_dir, scm, dvc, erepo_dir): @@ -73,8 +72,7 @@ def test_import_git_dir(tmp_dir, scm, dvc, git_dir, src_is_dvc): stage = dvc.imp(os.fspath(git_dir), "src", "dst") - assert (tmp_dir / "dst").is_dir() - trees_equal(git_dir / "src", tmp_dir / "dst") + assert (tmp_dir / "dst").read_text() == {"file.txt": "hello"} assert tmp_dir.scm.repo.git.check_ignore(os.fspath(tmp_dir / "dst")) assert stage.deps[0].def_repo == { "url": os.fspath(git_dir), @@ -88,8 +86,7 @@ def test_import_dir(tmp_dir, scm, dvc, erepo_dir): stage = dvc.imp(os.fspath(erepo_dir), "dir", "dir_imported") - assert os.path.isdir("dir_imported") - trees_equal(erepo_dir / "dir", "dir_imported") + assert (tmp_dir / "dir_imported").read_text() == {"foo": "foo content"} assert scm.repo.git.check_ignore("dir_imported") assert stage.deps[0].def_repo == { "url": os.fspath(erepo_dir), @@ -222,8 +219,7 @@ def test_pull_imported_directory_stage(tmp_dir, dvc, erepo_dir): dvc.pull(["dir_imported.dvc"]) - assert os.path.isdir("dir_imported") - trees_equal(erepo_dir / "dir", "dir_imported") + assert (tmp_dir / "dir_imported").read_text() == {"foo": "foo content"} def test_download_error_pulling_imported_stage(tmp_dir, dvc, erepo_dir): diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index 060315e956..91f63c6fb4 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -1,6 +1,5 @@ import os from contextlib import contextmanager -from filecmp import dircmp from dvc.scm import Git @@ -20,15 +19,5 @@ def cd(newdir): os.chdir(prevdir) -def trees_equal(dir_path_1, dir_path_2): - - comparison = dircmp(dir_path_1, dir_path_2) - - assert set(comparison.left_only) == set(comparison.right_only) == set() - - for d in comparison.common_dirs: - trees_equal(os.path.join(dir_path_1, d), os.path.join(dir_path_2, d)) - - def to_posixpath(path): return path.replace("\\", "/") From 6f5fd242b890664371813d92ccd88b1398df68d0 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 2 Jul 2020 22:23:24 +0300 Subject: [PATCH 2/2] status: pass tree as a kwarg Fixes #4150 Depends on https://github.com/iterative/dvc/pull/4155 --- dvc/dependency/repo.py | 2 +- tests/func/test_update.py | 51 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 8566c080b9..3ae2a77dfb 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -64,7 +64,7 @@ def _get_checksum(self, locked=True): # We are polluting our repo cache with some dir listing here if tree.isdir(path): - return self.repo.cache.local.get_hash(path, tree) + return self.repo.cache.local.get_hash(path, tree=tree) return tree.get_file_hash(path) def status(self): diff --git a/tests/func/test_update.py b/tests/func/test_update.py index b5bc9d14ed..ec49eaf641 100644 --- a/tests/func/test_update.py +++ b/tests/func/test_update.py @@ -10,26 +10,69 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached): gen = erepo_dir.dvc_gen if cached else erepo_dir.scm_gen with erepo_dir.branch("branch", new=True), erepo_dir.chdir(): - gen("version", "branch", "add version file") + gen( + { + "version": "branch", + "dir": {"version": "branch", "subdir": {"file": "file"}}, + }, + commit="add version file", + ) old_rev = erepo_dir.scm.get_rev() stage = dvc.imp(os.fspath(erepo_dir), "version", "version", rev="branch") + dir_stage = dvc.imp(os.fspath(erepo_dir), "dir", "dir", rev="branch") + assert dvc.status() == {} assert (tmp_dir / "version").read_text() == "branch" + assert (tmp_dir / "dir").read_text() == { + "version": "branch", + "subdir": {"file": "file"}, + } assert stage.deps[0].def_repo["rev_lock"] == old_rev + assert dir_stage.deps[0].def_repo["rev_lock"] == old_rev # Update version file with erepo_dir.branch("branch", new=False), erepo_dir.chdir(): - gen("version", "updated", "update version content") + gen( + { + "version": "updated", + "dir": {"version": "updated", "subdir": {"file": "file"}}, + }, + commit="update version content", + ) new_rev = erepo_dir.scm.get_rev() assert old_rev != new_rev - dvc.update([stage.path]) + assert dvc.status() == { + "dir.dvc": [ + { + "changed deps": { + f"dir ({os.fspath(erepo_dir)})": "update available" + } + } + ], + "version.dvc": [ + { + "changed deps": { + f"version ({os.fspath(erepo_dir)})": "update available" + } + } + ], + } + + (stage,) = dvc.update(stage.path) + (dir_stage,) = dvc.update(dir_stage.path) + assert dvc.status() == {} + assert (tmp_dir / "version").read_text() == "updated" + assert (tmp_dir / "dir").read_text() == { + "version": "updated", + "subdir": {"file": "file"}, + } - stage = Dvcfile(dvc, stage.path).stage assert stage.deps[0].def_repo["rev_lock"] == new_rev + assert dir_stage.deps[0].def_repo["rev_lock"] == new_rev def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):