From 72ac35e66f83571ff1614df83f1a875e45ef58be Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Mon, 23 Nov 2020 01:38:02 +0000 Subject: [PATCH 1/9] wip: add target support (back) to dvc diff --- dvc/command/diff.py | 13 ++++++++++++- dvc/repo/diff.py | 13 ++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/dvc/command/diff.py b/dvc/command/diff.py index 4842018a4a..ae565ddd40 100644 --- a/dvc/command/diff.py +++ b/dvc/command/diff.py @@ -127,7 +127,9 @@ def _format(diff, hide_missing=False): def run(self): try: - diff = self.repo.diff(self.args.a_rev, self.args.b_rev) + diff = self.repo.diff( + self.args.a_rev, self.args.b_rev, self.args.target + ) show_hash = self.args.show_hash hide_missing = self.args.b_rev or self.args.hide_missing if hide_missing: @@ -167,6 +169,15 @@ def add_parser(subparsers, parent_parser): help=DIFF_DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter, ) + diff_parser.add_argument( + "-t", + "--target", + help=( + "Source path to a data file or directory. Default None. " + "If not specified, compares all files and directories " + "that are under DVC control in the current working space." + ), + ) diff_parser.add_argument( "a_rev", help="Old Git commit to compare (defaults to HEAD)", diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index ba7a4c44ca..f263ec73dc 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -9,7 +9,7 @@ @locked -def diff(self, a_rev="HEAD", b_rev=None): +def diff(self, a_rev="HEAD", b_rev=None, target=None): """ By default, it compares the workspace with the last commit's tree. @@ -28,7 +28,7 @@ def diff(self, a_rev="HEAD", b_rev=None): # brancher always returns workspace, but we only need to compute # workspace paths/checksums if b_rev was None continue - results[rev] = _paths_checksums(self) + results[rev] = _paths_checksums(self, target) old = results[a_rev] new = results[b_rev] @@ -62,7 +62,7 @@ def diff(self, a_rev="HEAD", b_rev=None): return ret if any(ret.values()) else {} -def _paths_checksums(repo): +def _paths_checksums(repo, target): """ A dictionary of checksums addressed by relpaths collected from the current tree outputs. @@ -74,10 +74,10 @@ def _paths_checksums(repo): file: "data" """ - return dict(_output_paths(repo)) + return dict(_output_paths(repo, target)) -def _output_paths(repo): +def _output_paths(repo, target): repo_tree = RepoTree(repo, stream=True) on_working_tree = isinstance(repo.tree, LocalTree) @@ -101,6 +101,9 @@ def _to_checksum(output): for stage in repo.stages: for output in stage.outs: if _exists(output): + # if a target was supplied, filter out non-target files + if target is not None and not str(output).startswith(target): + continue yield _to_path(output), _to_checksum(output) if output.is_dir_checksum: yield from _dir_output_paths(repo_tree, output) From 5bda56d34efce40b76b1753166ad76c9a6f2bcfd Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Sun, 29 Nov 2020 21:26:24 +0000 Subject: [PATCH 2/9] Change diff param to targets and update repo.diff accordingly --- dvc/command/diff.py | 13 ++++++------ dvc/repo/diff.py | 48 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/dvc/command/diff.py b/dvc/command/diff.py index ae565ddd40..3b47ef4619 100644 --- a/dvc/command/diff.py +++ b/dvc/command/diff.py @@ -5,6 +5,7 @@ import colorama +from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link from dvc.exceptions import DvcException @@ -128,7 +129,7 @@ def _format(diff, hide_missing=False): def run(self): try: diff = self.repo.diff( - self.args.a_rev, self.args.b_rev, self.args.target + self.args.a_rev, self.args.b_rev, self.args.targets ) show_hash = self.args.show_hash hide_missing = self.args.b_rev or self.args.hide_missing @@ -170,14 +171,14 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) diff_parser.add_argument( - "-t", - "--target", + "--targets", + nargs="*", help=( - "Source path to a data file or directory. Default None. " - "If not specified, compares all files and directories " + "Source paths to a data files or directories. Default None. " + "If not specified, compares all files and directories `). " "that are under DVC control in the current working space." ), - ) + ).complete = completion.FILE diff_parser.add_argument( "a_rev", help="Old Git commit to compare (defaults to HEAD)", diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index f263ec73dc..0e8775bf81 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -1,6 +1,8 @@ import logging import os +from dvc.exceptions import PathMissingError +from dvc.path_info import PathInfo from dvc.repo import locked from dvc.tree.local import LocalTree from dvc.tree.repo import RepoTree @@ -9,7 +11,7 @@ @locked -def diff(self, a_rev="HEAD", b_rev=None, target=None): +def diff(self, a_rev="HEAD", b_rev=None, targets=None): """ By default, it compares the workspace with the last commit's tree. @@ -28,11 +30,14 @@ def diff(self, a_rev="HEAD", b_rev=None, target=None): # brancher always returns workspace, but we only need to compute # workspace paths/checksums if b_rev was None continue - results[rev] = _paths_checksums(self, target) + results[rev] = _paths_checksums(self) old = results[a_rev] new = results[b_rev] + if targets is not None and len(targets): + old, new = _filter_diff_targets(self, targets, old, new) + # Compare paths between the old and new tree. # set() efficiently converts dict keys to a set added = sorted(set(new) - set(old)) @@ -62,7 +67,7 @@ def diff(self, a_rev="HEAD", b_rev=None, target=None): return ret if any(ret.values()) else {} -def _paths_checksums(repo, target): +def _paths_checksums(repo): """ A dictionary of checksums addressed by relpaths collected from the current tree outputs. @@ -74,10 +79,10 @@ def _paths_checksums(repo, target): file: "data" """ - return dict(_output_paths(repo, target)) + return dict(_output_paths(repo)) -def _output_paths(repo, target): +def _output_paths(repo): repo_tree = RepoTree(repo, stream=True) on_working_tree = isinstance(repo.tree, LocalTree) @@ -101,9 +106,6 @@ def _to_checksum(output): for stage in repo.stages: for output in stage.outs: if _exists(output): - # if a target was supplied, filter out non-target files - if target is not None and not str(output).startswith(target): - continue yield _to_path(output), _to_checksum(output) if output.is_dir_checksum: yield from _dir_output_paths(repo_tree, output) @@ -127,3 +129,33 @@ def _filter_missing(repo, paths): out = metadata.outs[0] if out.status().get(str(out)) == "not in cache": yield path + + +def _filter_diff_targets(repo, targets, old, new): + filtered_old = {} + filtered_new = {} + + for target_path in targets: + target_path_info = PathInfo(target_path) + target_not_path_found = True + + # filter out targets in old + for old_path, old_hash in old.items(): + old_path_info = PathInfo(old_path) + + if old_path_info.isin_or_eq(target_path_info): + filtered_old[old_path] = old_hash + target_not_path_found = False + + # filter out targets in new + for new_path, new_hash in new.items(): + new_path_info = PathInfo(new_path) + + if new_path_info.isin_or_eq(target_path_info): + filtered_new[new_path] = new_hash + target_not_path_found = False + + if target_not_path_found: + raise PathMissingError(target_path, repo) + + return filtered_old, filtered_new From 7e2e75a3235a623b4fbdccaea2ce957d5bbf6154 Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Wed, 2 Dec 2020 01:22:11 +0000 Subject: [PATCH 3/9] Update dvc diff --targets with PR review feedback --- dvc/path_info.py | 16 +++++++++ dvc/repo/diff.py | 86 +++++++++++++++++++++++++++--------------------- 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index 6f6aa57d80..87a5256944 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -19,8 +19,24 @@ def overlaps(self, other): return self.isin_or_eq(other) or other.isin(self) def isin_or_eq(self, other): + if isinstance(other, list): + others = other + + for other in others: + if self.isin_or_eq(other): + return True + + return False + return self == other or self.isin(other) # pylint: disable=no-member + def isinside(self, others): + for other in others: + if other.isin(self): + return True + + return False + class PathInfo(pathlib.PurePath, _BasePath): # Use __slots__ in PathInfo objects following PurePath implementation. diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index 0e8775bf81..0ebc6cad7b 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -2,7 +2,6 @@ import os from dvc.exceptions import PathMissingError -from dvc.path_info import PathInfo from dvc.repo import locked from dvc.tree.local import LocalTree from dvc.tree.repo import RepoTree @@ -25,19 +24,36 @@ def diff(self, a_rev="HEAD", b_rev=None, targets=None): b_rev = b_rev if b_rev else "workspace" results = {} + missing_targets = {} for rev in self.brancher(revs=[a_rev, b_rev]): if rev == "workspace" and rev != b_rev: # brancher always returns workspace, but we only need to compute # workspace paths/checksums if b_rev was None continue - results[rev] = _paths_checksums(self) + + targets_path_infos = None + if targets is not None: + # convert targets to path_infos, and capture any missing targets + targets_path_infos, missing_targets[rev] = _targets_to_path_infos( + self, targets + ) + + results[rev] = _paths_checksums(self, targets_path_infos) + + if targets is not None: + # check for overlapping missing targets between a_rev and b_rev + missing_targets = list( + set(missing_targets[a_rev]) & set(missing_targets[b_rev]) + ) + + if len(missing_targets): + # invalid targets supplied, raise error + for missing_target in missing_targets: + raise PathMissingError(missing_target, self) old = results[a_rev] new = results[b_rev] - if targets is not None and len(targets): - old, new = _filter_diff_targets(self, targets, old, new) - # Compare paths between the old and new tree. # set() efficiently converts dict keys to a set added = sorted(set(new) - set(old)) @@ -67,7 +83,7 @@ def diff(self, a_rev="HEAD", b_rev=None, targets=None): return ret if any(ret.values()) else {} -def _paths_checksums(repo): +def _paths_checksums(repo, targets): """ A dictionary of checksums addressed by relpaths collected from the current tree outputs. @@ -79,12 +95,13 @@ def _paths_checksums(repo): file: "data" """ - return dict(_output_paths(repo)) + return dict(_output_paths(repo, targets)) -def _output_paths(repo): +def _output_paths(repo, targets): repo_tree = RepoTree(repo, stream=True) on_working_tree = isinstance(repo.tree, LocalTree) + no_targets = targets is None def _exists(output): if on_working_tree: @@ -106,17 +123,25 @@ def _to_checksum(output): for stage in repo.stages: for output in stage.outs: if _exists(output): - yield _to_path(output), _to_checksum(output) - if output.is_dir_checksum: - yield from _dir_output_paths(repo_tree, output) + yield_output = no_targets or output.path_info.isin_or_eq( + targets + ) + if yield_output: + yield _to_path(output), _to_checksum(output) + if output.is_dir_checksum and ( + yield_output or output.path_info.isinside(targets) + ): + yield from _dir_output_paths(repo_tree, output, targets) -def _dir_output_paths(repo_tree, output): + +def _dir_output_paths(repo_tree, output, targets=None): from dvc.config import NoRemoteError try: for fname in repo_tree.walk_files(output.path_info): - yield str(fname), repo_tree.get_file_hash(fname).value + if targets is None or fname.isin_or_eq(targets): + yield str(fname), repo_tree.get_file_hash(fname).value except NoRemoteError: logger.warning("dir cache entry for '%s' is missing", output) @@ -131,31 +156,16 @@ def _filter_missing(repo, paths): yield path -def _filter_diff_targets(repo, targets, old, new): - filtered_old = {} - filtered_new = {} - - for target_path in targets: - target_path_info = PathInfo(target_path) - target_not_path_found = True - - # filter out targets in old - for old_path, old_hash in old.items(): - old_path_info = PathInfo(old_path) +def _targets_to_path_infos(repo, targets): + path_infos = [] + missing = [] - if old_path_info.isin_or_eq(target_path_info): - filtered_old[old_path] = old_hash - target_not_path_found = False - - # filter out targets in new - for new_path, new_hash in new.items(): - new_path_info = PathInfo(new_path) - - if new_path_info.isin_or_eq(target_path_info): - filtered_new[new_path] = new_hash - target_not_path_found = False + repo_tree = RepoTree(repo, stream=True) - if target_not_path_found: - raise PathMissingError(target_path, repo) + for target in targets: + if repo_tree.exists(target): + path_infos.append(repo_tree.metadata(target).path_info) + else: + missing.append(target) - return filtered_old, filtered_new + return path_infos, missing From 63db93a82d990dcff1a03a17b8407bae7cba0ade Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Wed, 2 Dec 2020 01:23:37 +0000 Subject: [PATCH 4/9] Tests for dvc diff --targets --- tests/func/test_diff.py | 84 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py index 6f296aa35e..e3a3018aca 100644 --- a/tests/func/test_diff.py +++ b/tests/func/test_diff.py @@ -264,3 +264,87 @@ def test_no_commits(tmp_dir): assert Git().no_commits assert Repo.init().diff() == {} + + +def test_targets(tmp_dir, scm, dvc): + from dvc.exceptions import PathMissingError + + tmp_dir.dvc_gen("file", "first", commit="add a file") + + tmp_dir.dvc_gen({"dir": {"1": "1", "2": "2"}}) + tmp_dir.dvc_gen("file", "second") + + remove(tmp_dir / ".dvc" / "cache") + (tmp_dir / ".dvc" / "tmp" / "state").unlink() + + dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" + + with pytest.raises(PathMissingError): + dvc.diff(targets=["missing"]) + + assert dvc.diff(targets=["file"]) == { + "added": [], + "deleted": [], + "modified": [ + { + "path": "file", + "hash": {"old": digest("first"), "new": digest("second")}, + } + ], + "not in cache": [], + } + + assert dvc.diff(targets=["dir"]) == { + "added": [ + {"path": os.path.join("dir", ""), "hash": dir_checksum}, + {"path": os.path.join("dir", "1"), "hash": digest("1")}, + {"path": os.path.join("dir", "2"), "hash": digest("2")}, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=["dir/"]) == { + "added": [ + {"path": os.path.join("dir", ""), "hash": dir_checksum}, + {"path": os.path.join("dir", "1"), "hash": digest("1")}, + {"path": os.path.join("dir", "2"), "hash": digest("2")}, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=["dir/1"]) == { + "added": [{"path": os.path.join("dir", "1"), "hash": digest("1")}], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=["dir/1", "dir/2"]) == { + "added": [ + {"path": os.path.join("dir", "1"), "hash": digest("1")}, + {"path": os.path.join("dir", "2"), "hash": digest("2")}, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=["file", "dir"]) == { + "added": [ + {"path": os.path.join("dir", ""), "hash": dir_checksum}, + {"path": os.path.join("dir", "1"), "hash": digest("1")}, + {"path": os.path.join("dir", "2"), "hash": digest("2")}, + ], + "deleted": [], + "modified": [ + { + "path": "file", + "hash": {"old": digest("first"), "new": digest("second")}, + } + ], + "not in cache": [], + } From a1613ce284af7852dddbed047c1c331ee25f10d9 Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Fri, 4 Dec 2020 01:50:29 +0000 Subject: [PATCH 5/9] Update dvc diff --targets with PR review feedback --- dvc/path_info.py | 16 ---------------- dvc/repo/diff.py | 31 +++++++++++++++---------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index 87a5256944..6f6aa57d80 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -19,24 +19,8 @@ def overlaps(self, other): return self.isin_or_eq(other) or other.isin(self) def isin_or_eq(self, other): - if isinstance(other, list): - others = other - - for other in others: - if self.isin_or_eq(other): - return True - - return False - return self == other or self.isin(other) # pylint: disable=no-member - def isinside(self, others): - for other in others: - if other.isin(self): - return True - - return False - class PathInfo(pathlib.PurePath, _BasePath): # Use __slots__ in PathInfo objects following PurePath implementation. diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index 0ebc6cad7b..663ef2e426 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -42,14 +42,10 @@ def diff(self, a_rev="HEAD", b_rev=None, targets=None): if targets is not None: # check for overlapping missing targets between a_rev and b_rev - missing_targets = list( - set(missing_targets[a_rev]) & set(missing_targets[b_rev]) - ) - - if len(missing_targets): - # invalid targets supplied, raise error - for missing_target in missing_targets: - raise PathMissingError(missing_target, self) + for target in set(missing_targets[a_rev]) & set( + missing_targets[b_rev] + ): + raise PathMissingError(target, self) old = results[a_rev] new = results[b_rev] @@ -101,7 +97,6 @@ def _paths_checksums(repo, targets): def _output_paths(repo, targets): repo_tree = RepoTree(repo, stream=True) on_working_tree = isinstance(repo.tree, LocalTree) - no_targets = targets is None def _exists(output): if on_working_tree: @@ -123,14 +118,16 @@ def _to_checksum(output): for stage in repo.stages: for output in stage.outs: if _exists(output): - yield_output = no_targets or output.path_info.isin_or_eq( - targets - ) - if yield_output: + if targets is None or any( + output.path_info == target for target in targets + ): yield _to_path(output), _to_checksum(output) - if output.is_dir_checksum and ( - yield_output or output.path_info.isinside(targets) + if output.is_dir_checksum: + yield from _dir_output_paths(repo_tree, output) + + elif output.is_dir_checksum and any( + target.isin(output.path_info) for target in targets ): yield from _dir_output_paths(repo_tree, output, targets) @@ -140,7 +137,9 @@ def _dir_output_paths(repo_tree, output, targets=None): try: for fname in repo_tree.walk_files(output.path_info): - if targets is None or fname.isin_or_eq(targets): + if targets is None or any( + fname.isin_or_eq(target) for target in targets + ): yield str(fname), repo_tree.get_file_hash(fname).value except NoRemoteError: logger.warning("dir cache entry for '%s' is missing", output) From f325fdb50ec7b5c919c42941919092831fa0e7dd Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Fri, 4 Dec 2020 12:10:49 +0000 Subject: [PATCH 6/9] Update dvc diff --targets with PR review feedback and additional tests --- dvc/repo/diff.py | 16 +++++++------- tests/func/test_diff.py | 46 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index 663ef2e426..097d30d4ae 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -118,16 +118,16 @@ def _to_checksum(output): for stage in repo.stages: for output in stage.outs: if _exists(output): - if targets is None or any( - output.path_info == target for target in targets - ): - yield _to_path(output), _to_checksum(output) + yield_output = targets is None or any( + output.path_info.isin_or_eq(target) for target in targets + ) - if output.is_dir_checksum: - yield from _dir_output_paths(repo_tree, output) + if yield_output: + yield _to_path(output), _to_checksum(output) - elif output.is_dir_checksum and any( - target.isin(output.path_info) for target in targets + if output.is_dir_checksum and ( + yield_output + or any(target.isin(output.path_info) for target in targets) ): yield from _dir_output_paths(repo_tree, output, targets) diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py index e3a3018aca..62fc880e57 100644 --- a/tests/func/test_diff.py +++ b/tests/func/test_diff.py @@ -274,6 +274,8 @@ def test_targets(tmp_dir, scm, dvc): tmp_dir.dvc_gen({"dir": {"1": "1", "2": "2"}}) tmp_dir.dvc_gen("file", "second") + tmp_dir.dvc_gen(os.path.join("dir_with", "file.txt"), "first") + remove(tmp_dir / ".dvc" / "cache") (tmp_dir / ".dvc" / "tmp" / "state").unlink() @@ -305,7 +307,7 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } - assert dvc.diff(targets=["dir/"]) == { + assert dvc.diff(targets=["dir" + os.path.sep]) == { "added": [ {"path": os.path.join("dir", ""), "hash": dir_checksum}, {"path": os.path.join("dir", "1"), "hash": digest("1")}, @@ -316,14 +318,16 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } - assert dvc.diff(targets=["dir/1"]) == { + assert dvc.diff(targets=[os.path.join("dir", "1")]) == { "added": [{"path": os.path.join("dir", "1"), "hash": digest("1")}], "deleted": [], "modified": [], "not in cache": [], } - assert dvc.diff(targets=["dir/1", "dir/2"]) == { + assert dvc.diff( + targets=[os.path.join("dir", "1"), os.path.join("dir", "2")] + ) == { "added": [ {"path": os.path.join("dir", "1"), "hash": digest("1")}, {"path": os.path.join("dir", "2"), "hash": digest("2")}, @@ -348,3 +352,39 @@ def test_targets(tmp_dir, scm, dvc): ], "not in cache": [], } + + assert dvc.diff(targets=["dir_with"]) == { + "added": [ + { + "path": os.path.join("dir_with", "file.txt"), + "hash": digest("first"), + }, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=["dir_with" + os.path.sep]) == { + "added": [ + { + "path": os.path.join("dir_with", "file.txt"), + "hash": digest("first"), + }, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } + + assert dvc.diff(targets=[os.path.join("dir_with", "file.txt")]) == { + "added": [ + { + "path": os.path.join("dir_with", "file.txt"), + "hash": digest("first"), + }, + ], + "deleted": [], + "modified": [], + "not in cache": [], + } From 8e912e1949f0a8b23c1c7935adce4566ba1751cc Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Fri, 11 Dec 2020 01:22:44 +0000 Subject: [PATCH 7/9] split out new diff target tests as per PR comment --- tests/func/test_diff.py | 75 ++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py index 62fc880e57..d248817e1e 100644 --- a/tests/func/test_diff.py +++ b/tests/func/test_diff.py @@ -266,9 +266,7 @@ def test_no_commits(tmp_dir): assert Repo.init().diff() == {} -def test_targets(tmp_dir, scm, dvc): - from dvc.exceptions import PathMissingError - +def setup_targets_test(tmp_dir): tmp_dir.dvc_gen("file", "first", commit="add a file") tmp_dir.dvc_gen({"dir": {"1": "1", "2": "2"}}) @@ -279,11 +277,19 @@ def test_targets(tmp_dir, scm, dvc): remove(tmp_dir / ".dvc" / "cache") (tmp_dir / ".dvc" / "tmp" / "state").unlink() - dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" + +def test_targets_missing_path(tmp_dir, scm, dvc): + from dvc.exceptions import PathMissingError + + setup_targets_test(tmp_dir) with pytest.raises(PathMissingError): dvc.diff(targets=["missing"]) + +def test_targets_single_file(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + assert dvc.diff(targets=["file"]) == { "added": [], "deleted": [], @@ -296,18 +302,13 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } - assert dvc.diff(targets=["dir"]) == { - "added": [ - {"path": os.path.join("dir", ""), "hash": dir_checksum}, - {"path": os.path.join("dir", "1"), "hash": digest("1")}, - {"path": os.path.join("dir", "2"), "hash": digest("2")}, - ], - "deleted": [], - "modified": [], - "not in cache": [], - } - assert dvc.diff(targets=["dir" + os.path.sep]) == { +def test_targets_single_dir(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + + dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" + + expected_result = { "added": [ {"path": os.path.join("dir", ""), "hash": dir_checksum}, {"path": os.path.join("dir", "1"), "hash": digest("1")}, @@ -318,6 +319,15 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } + assert dvc.diff(targets=["dir"]) == expected_result + assert dvc.diff(targets=["dir" + os.path.sep]) == expected_result + + +def test_targets_single_file_in_dir(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + + dvc.diff(targets=["dir" + os.path.sep]) + assert dvc.diff(targets=[os.path.join("dir", "1")]) == { "added": [{"path": os.path.join("dir", "1"), "hash": digest("1")}], "deleted": [], @@ -325,6 +335,12 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } + +def test_targets_two_files_in_dir(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + + dvc.diff(targets=["dir" + os.path.sep]) + assert dvc.diff( targets=[os.path.join("dir", "1"), os.path.join("dir", "2")] ) == { @@ -337,6 +353,12 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } + +def test_targets_file_and_dir(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + + dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" + assert dvc.diff(targets=["file", "dir"]) == { "added": [ {"path": os.path.join("dir", ""), "hash": dir_checksum}, @@ -353,19 +375,11 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } - assert dvc.diff(targets=["dir_with"]) == { - "added": [ - { - "path": os.path.join("dir_with", "file.txt"), - "hash": digest("first"), - }, - ], - "deleted": [], - "modified": [], - "not in cache": [], - } - assert dvc.diff(targets=["dir_with" + os.path.sep]) == { +def test_targets_single_dir_with_file(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + + expected_result = { "added": [ { "path": os.path.join("dir_with", "file.txt"), @@ -377,6 +391,13 @@ def test_targets(tmp_dir, scm, dvc): "not in cache": [], } + assert dvc.diff(targets=["dir_with"]) == expected_result + assert dvc.diff(targets=["dir_with" + os.path.sep]) == expected_result + + +def test_targets_single_file_in_dir_with_file(tmp_dir, scm, dvc): + setup_targets_test(tmp_dir) + assert dvc.diff(targets=[os.path.join("dir_with", "file.txt")]) == { "added": [ { From 318465edeee445b715832ac4c9209ea1549d7156 Mon Sep 17 00:00:00 2001 From: Sandeep Mistry Date: Fri, 11 Dec 2020 02:52:24 +0000 Subject: [PATCH 8/9] remove unnecessary cache dir removal from setup_targets_test --- tests/func/test_diff.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py index d248817e1e..63cd02c38f 100644 --- a/tests/func/test_diff.py +++ b/tests/func/test_diff.py @@ -274,9 +274,6 @@ def setup_targets_test(tmp_dir): tmp_dir.dvc_gen(os.path.join("dir_with", "file.txt"), "first") - remove(tmp_dir / ".dvc" / "cache") - (tmp_dir / ".dvc" / "tmp" / "state").unlink() - def test_targets_missing_path(tmp_dir, scm, dvc): from dvc.exceptions import PathMissingError @@ -326,8 +323,6 @@ def test_targets_single_dir(tmp_dir, scm, dvc): def test_targets_single_file_in_dir(tmp_dir, scm, dvc): setup_targets_test(tmp_dir) - dvc.diff(targets=["dir" + os.path.sep]) - assert dvc.diff(targets=[os.path.join("dir", "1")]) == { "added": [{"path": os.path.join("dir", "1"), "hash": digest("1")}], "deleted": [], @@ -339,8 +334,6 @@ def test_targets_single_file_in_dir(tmp_dir, scm, dvc): def test_targets_two_files_in_dir(tmp_dir, scm, dvc): setup_targets_test(tmp_dir) - dvc.diff(targets=["dir" + os.path.sep]) - assert dvc.diff( targets=[os.path.join("dir", "1"), os.path.join("dir", "2")] ) == { From bd5c6f4ead6f0593b4ab912a1dfb16e4d057b8ce Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Tue, 15 Dec 2020 13:55:13 +0200 Subject: [PATCH 9/9] Update dvc/command/diff.py --- dvc/command/diff.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dvc/command/diff.py b/dvc/command/diff.py index 3b47ef4619..6fd64e67da 100644 --- a/dvc/command/diff.py +++ b/dvc/command/diff.py @@ -174,9 +174,8 @@ def add_parser(subparsers, parent_parser): "--targets", nargs="*", help=( - "Source paths to a data files or directories. Default None. " - "If not specified, compares all files and directories `). " - "that are under DVC control in the current working space." + "Limit command scope to these tracked files or directories. " + "Accepts one or more file paths." ), ).complete = completion.FILE diff_parser.add_argument(