From 3a377ceed60e0ed2a144c4eceed451412deae53f Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sat, 25 Jul 2020 11:31:32 +0800 Subject: [PATCH 01/15] Update some tests first fix #3736 --- tests/func/test_check_ignore.py | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/func/test_check_ignore.py diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py new file mode 100644 index 0000000000..cbf98cd2cb --- /dev/null +++ b/tests/func/test_check_ignore.py @@ -0,0 +1,61 @@ +import os + +import pytest + +from dvc.ignore import DvcIgnore +from dvc.main import main + + +@pytest.mark.parametrize( + "file,ret,output", [("ignored", 0, "ignored\n"), ("not_ignored", 1, "")] +) +def test_check_ignore(tmp_dir, dvc, file, ret, output, capsys): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored") + + assert main(["check-ignore", file]) == ret + captured = capsys.readouterr() + run_output = captured.out + assert run_output == output + + +@pytest.mark.parametrize( + "file,ret,output", + [ + ("file", 0, "{}:1:f*\tfile\n".format(DvcIgnore.DVCIGNORE_FILE)), + ("foo", 1, "{}:2:!foo\tfoo\n".format(DvcIgnore.DVCIGNORE_FILE)), + ( + os.path.join("dir", "foobar"), + 0, + "{}:1:foobar\t{}\n".format( + os.path.join("dir", DvcIgnore.DVCIGNORE_FILE), + os.path.join("dir", "foobar"), + ), + ), + ], +) +def test_check_ignore_verbose(tmp_dir, dvc, file, ret, output, capsys): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "f*\n!foo") + tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "foobar"}}) + + assert main(["check-ignore", "-v", file]) == ret + captured = capsys.readouterr() + run_output = captured.out + assert run_output == output + + +@pytest.mark.parametrize( + "non_matching,output", [(["-n"], "::\tfile\n"), ([], "")] +) +def test_check_ignore_non_matching(tmp_dir, dvc, non_matching, output, capsys): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-v"] + non_matching + ["file"]) == 1 + captured = capsys.readouterr() + run_output = captured.out + assert run_output == output + + +def test_check_ignore_non_matching_without_verbose(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-n", "file"]) == 255 From 32154f717bc0b8bbe6c6e27ece2f83ccf99ff8a0 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sun, 26 Jul 2020 09:53:07 +0800 Subject: [PATCH 02/15] first edition --- dvc/cli.py | 2 + dvc/command/check_ignore.py | 69 +++++++++++++++++++++ dvc/ignore.py | 102 ++++++++++++++++++++++++++----- dvc/main.py | 2 +- dvc/pathspec_math.py | 8 ++- tests/func/test_check_ignore.py | 70 ++++++++++++++++----- tests/func/test_ignore.py | 25 +++++--- tests/unit/test_pathspec_math.py | 6 +- 8 files changed, 243 insertions(+), 41 deletions(-) create mode 100644 dvc/command/check_ignore.py diff --git a/dvc/cli.py b/dvc/cli.py index 1392332040..2b020d4263 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -6,6 +6,7 @@ from .command import ( add, cache, + check_ignore, checkout, commit, completion, @@ -79,6 +80,7 @@ git_hook, plots, experiments, + check_ignore, ] diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py new file mode 100644 index 0000000000..fab29e8605 --- /dev/null +++ b/dvc/command/check_ignore.py @@ -0,0 +1,69 @@ +import argparse +import logging + +from dvc.command import completion +from dvc.command.base import CmdBase, append_doc_link +from dvc.exceptions import DvcException + +logger = logging.getLogger(__name__) + + +class CmdCheckIgnore(CmdBase): + def __init__(self, args): + super().__init__(args) + self.ignore_filter = self.repo.tree.dvcignore + + def _show_results(self, results): + for result in results: + if result.matches or self.args.non_matching: + # + if self.args.details: + logger.info( + "{}\t{}".format(result.patterns[-1], result.file) + ) + else: + logger.info(result.file) + + def run(self): + if self.args.non_matching and not self.args.details: + raise DvcException("--non-matching is only valid with --details") + + if self.args.quiet and self.args.details: + raise DvcException("cannot both --details and --quiet") + + results = self.ignore_filter.check_ignore(self.args.targets) + self._show_results(results) + if any(result.matches for result in results): + return 0 + return 1 + + +def add_parser(subparsers, parent_parser): + ADD_HELP = "Debug DVC ignore/exclude files" + + parser = subparsers.add_parser( + "check-ignore", + parents=[parent_parser], + description=append_doc_link(ADD_HELP, "check-ignore"), + help=ADD_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-d", + "--details", + action="store_true", + default=False, + help="Show the exclude pattern together with the path.", + ) + parser.add_argument( + "-n", + "--non-matching", + action="store_true", + default=False, + help="Show given paths which don’t match any pattern. " + "Only used when --verbose is enabled.", + ) + parser.add_argument( + "targets", nargs="+", help="Input files/directories to add.", + ).complete = completion.FILE + parser.set_defaults(func=CmdCheckIgnore) diff --git a/dvc/ignore.py b/dvc/ignore.py index 05463403cf..63620d2c69 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -1,6 +1,7 @@ import logging import os import re +from collections import namedtuple from itertools import groupby from pathspec.patterns import GitWildMatchPattern @@ -8,7 +9,7 @@ from pygtrie import StringTrie from dvc.path_info import PathInfo -from dvc.pathspec_math import merge_patterns +from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.system import System from dvc.utils import relpath @@ -24,18 +25,26 @@ def __call__(self, root, dirs, files): class DvcIgnorePatterns(DvcIgnore): def __init__(self, pattern_list, dirname): + if pattern_list: + if isinstance(pattern_list[0], str): + pattern_list = [ + PatternInfo(pattern, "") for pattern in pattern_list + ] self.pattern_list = pattern_list self.dirname = dirname self.prefix = self.dirname + os.sep - regex_pattern_list = map( - GitWildMatchPattern.pattern_to_regex, pattern_list - ) + self.regex_pattern_list = [ + GitWildMatchPattern.pattern_to_regex(pattern_info.patterns) + for pattern_info in pattern_list + ] self.ignore_spec = [ (ignore, re.compile("|".join(item[0] for item in group))) - for ignore, group in groupby(regex_pattern_list, lambda x: x[1]) + for ignore, group in groupby( + self.regex_pattern_list, lambda x: x[1] + ) if ignore is not None ] @@ -43,9 +52,19 @@ def __init__(self, pattern_list, dirname): def from_files(cls, ignore_file_path, tree): assert os.path.isabs(ignore_file_path) dirname = os.path.normpath(os.path.dirname(ignore_file_path)) + ignore_file_rel_path = os.path.relpath( + ignore_file_path, tree.tree_root + ) with tree.open(ignore_file_path, encoding="utf-8") as fobj: path_spec_lines = [ - line for line in map(str.strip, fobj.readlines()) if line + PatternInfo( + line, + "{}:{}:{}".format(ignore_file_rel_path, line_no + 1, line), + ) + for line_no, line in enumerate( + map(str.strip, fobj.readlines()) + ) + if line ] return cls(path_spec_lines, dirname) @@ -56,7 +75,7 @@ def __call__(self, root, dirs, files): return dirs, files - def matches(self, dirname, basename, is_dir=False): + def _get_normalize_path(self, dirname, basename): # NOTE: `relpath` is too slow, so we have to assume that both # `dirname` and `self.dirname` are relative or absolute together. if dirname == self.dirname: @@ -70,6 +89,10 @@ def matches(self, dirname, basename, is_dir=False): if not System.is_unix(): path = normalize_file(path) + return path + + def matches(self, dirname, basename, is_dir=False): + path = self._get_normalize_path(dirname, basename) return self.ignore(path, is_dir) def ignore(self, path, is_dir): @@ -85,20 +108,46 @@ def ignore(self, path, is_dir): result = ignore return result + def match_details(self, dirname, basename, is_dir=False): + path = self._get_normalize_path(dirname, basename) + return self._ignore_details(path, is_dir) + + def _ignore_details(self, path, is_dir): + result = [] + for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list): + regex = re.compile(ignore[0]) + # skip system pattern + if not pattern.file_info: + continue + if is_dir: + path_dir = f"{path}/" + if regex.match(path) or regex.match(path_dir): + result.append(pattern.file_info) + else: + if regex.match(path): + result.append(pattern.file_info) + return result + def __hash__(self): - return hash(self.dirname + ":" + "\n".join(self.pattern_list)) + return hash(self.dirname + ":" + str(self.pattern_list)) def __eq__(self, other): if not isinstance(other, DvcIgnorePatterns): return NotImplemented return (self.dirname == other.dirname) & ( - self.pattern_list == other.pattern_list + [pattern.patterns for pattern in self.pattern_list] + == [pattern.patterns for pattern in other.pattern_list] ) def __bool__(self): return bool(self.pattern_list) +CheckIgnoreResult = namedtuple( + "CheckIgnoreResult", ["file", "matches", "patterns"] +) + + class DvcIgnoreFilterNoop: def __init__(self, tree, root_dir): pass @@ -112,6 +161,9 @@ def is_ignored_dir(self, _): def is_ignored_file(self, _): return False + def check_ignore(self, _): + return [] + class DvcIgnoreFilter: @staticmethod @@ -159,19 +211,18 @@ def _update_sub_repo(self, root, dirs): for d in dirs: if self._is_dvc_repo(root, d): old_pattern = self._get_trie_pattern(root) + new_pattern = DvcIgnorePatterns(["/{}/".format(d)], root) if old_pattern: self.ignores_trie_tree[root] = DvcIgnorePatterns( *merge_patterns( old_pattern.pattern_list, old_pattern.dirname, - ["/{}/".format(d)], - root, + new_pattern.pattern_list, + new_pattern.dirname, ) ) else: - self.ignores_trie_tree[root] = DvcIgnorePatterns( - ["/{}/".format(d)], root - ) + self.ignores_trie_tree[root] = new_pattern def __call__(self, root, dirs, files): ignore_pattern = self._get_trie_pattern(root) @@ -217,3 +268,26 @@ def _outside_repo(self, path): ): return True return False + + def check_ignore(self, targets): + check_results = [] + for target in targets: + full_target = os.path.join(os.getcwd(), target) + if not self._outside_repo(full_target): + dirname, basename = os.path.split( + os.path.normpath(full_target) + ) + pattern = self._get_trie_pattern(dirname) + if pattern: + matches = pattern.match_details( + dirname, basename, os.path.isdir(full_target) + ) + + if matches: + check_results.append( + CheckIgnoreResult(target, True, matches) + ) + continue + check_results.append(CheckIgnoreResult(target, False, ["::"])) + + return check_results diff --git a/dvc/main.py b/dvc/main.py index dc737aa5f5..c61912b019 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -93,7 +93,7 @@ def main(argv=None): # so won't be reused by any other subsequent run anyway. clean_repos() - if ret != 0: + if ret != 0 and ret != 1: logger.info(FOOTER) if analytics.is_enabled(): diff --git a/dvc/pathspec_math.py b/dvc/pathspec_math.py index 5af503cd2d..6191f7717e 100644 --- a/dvc/pathspec_math.py +++ b/dvc/pathspec_math.py @@ -3,11 +3,14 @@ # of two path specification patterns with different base # All the operations follow the documents of `gitignore` import os +from collections import namedtuple from pathspec.util import normalize_file from dvc.utils import relpath +PatternInfo = namedtuple("PatternInfo", ["patterns", "file_info"]) + def _not_ignore(rule): return (True, rule[1:]) if rule.startswith("!") else (False, rule) @@ -59,7 +62,10 @@ def _change_dirname(dirname, pattern_list, new_dirname): if rel.startswith(".."): raise ValueError("change dirname can only change to parent path") - return [change_rule(rule, rel) for rule in pattern_list] + return [ + PatternInfo(change_rule(rule.patterns, rel), rule.file_info) + for rule in pattern_list + ] def merge_patterns(pattern_a, prefix_a, pattern_b, prefix_b): diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index cbf98cd2cb..0d7026bc80 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -9,20 +9,18 @@ @pytest.mark.parametrize( "file,ret,output", [("ignored", 0, "ignored\n"), ("not_ignored", 1, "")] ) -def test_check_ignore(tmp_dir, dvc, file, ret, output, capsys): +def test_check_ignore(tmp_dir, dvc, file, ret, output, caplog): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored") assert main(["check-ignore", file]) == ret - captured = capsys.readouterr() - run_output = captured.out - assert run_output == output + assert output in caplog.text @pytest.mark.parametrize( "file,ret,output", [ ("file", 0, "{}:1:f*\tfile\n".format(DvcIgnore.DVCIGNORE_FILE)), - ("foo", 1, "{}:2:!foo\tfoo\n".format(DvcIgnore.DVCIGNORE_FILE)), + ("foo", 0, "{}:2:!foo\tfoo\n".format(DvcIgnore.DVCIGNORE_FILE)), ( os.path.join("dir", "foobar"), 0, @@ -33,29 +31,69 @@ def test_check_ignore(tmp_dir, dvc, file, ret, output, capsys): ), ], ) -def test_check_ignore_verbose(tmp_dir, dvc, file, ret, output, capsys): +def test_check_ignore_details(tmp_dir, dvc, file, ret, output, caplog): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "f*\n!foo") tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "foobar"}}) - assert main(["check-ignore", "-v", file]) == ret - captured = capsys.readouterr() - run_output = captured.out - assert run_output == output + assert main(["check-ignore", "-d", file]) == ret + assert output in caplog.text @pytest.mark.parametrize( "non_matching,output", [(["-n"], "::\tfile\n"), ([], "")] ) -def test_check_ignore_non_matching(tmp_dir, dvc, non_matching, output, capsys): +def test_check_ignore_non_matching(tmp_dir, dvc, non_matching, output, caplog): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") - assert main(["check-ignore", "-v"] + non_matching + ["file"]) == 1 - captured = capsys.readouterr() - run_output = captured.out - assert run_output == output + assert main(["check-ignore", "-d"] + non_matching + ["file"]) == 1 + assert output in caplog.text -def test_check_ignore_non_matching_without_verbose(tmp_dir, dvc): +def test_check_ignore_non_matching_without_details(tmp_dir, dvc): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") assert main(["check-ignore", "-n", "file"]) == 255 + + +def test_check_ignore_details_with_quiet(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "other") + + assert main(["check-ignore", "-d", "-q", "file"]) == 255 + + +@pytest.mark.parametrize("path,ret", [({"dir": {}}, 0), ({"dir": "files"}, 1)]) +def test_check_ignore_dir(tmp_dir, dvc, path, ret): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/") + tmp_dir.gen(path) + + assert main(["check-ignore", "-q", "dir"]) == ret + + +def test_check_ignore_default_dir(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/") + assert main(["check-ignore", "-q", ".dvc"]) == 1 + + +def test_check_ignore_sub_repo(tmp_dir, dvc): + tmp_dir.gen( + {DvcIgnore.DVCIGNORE_FILE: "other", "dir": {".dvc": {}, "foo": "bar"}} + ) + + assert main(["check-ignore", "-q", os.path.join("dir", "foo")]) == 1 + + +def test_check_multi_ignore_file(tmp_dir, dvc, caplog): + tmp_dir.gen( + { + DvcIgnore.DVCIGNORE_FILE: "other", + "dir": {DvcIgnore.DVCIGNORE_FILE: "bar\nfoo", "foo": "bar"}, + } + ) + + assert main(["check-ignore", "-d", os.path.join("dir", "foo")]) == 0 + assert "dir/.dvcignore:2:foo\tdir/foo" in caplog.text + + sub_dir = tmp_dir / "dir" + with sub_dir.chdir(): + assert main(["check-ignore", "-d", "foo"]) == 0 + assert ".dvcignore:2:foo\tfoo" in caplog.text diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 16afb978af..67e8f71a5a 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -6,13 +6,17 @@ from dvc.exceptions import DvcIgnoreInCollectedDirError from dvc.ignore import DvcIgnore, DvcIgnorePatterns from dvc.path_info import PathInfo -from dvc.pathspec_math import merge_patterns +from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.repo import Repo from dvc.utils import relpath from dvc.utils.fs import get_mtime_and_size from tests.dir_helpers import TmpDir +def _to_pattern_info_list(str_list): + return [PatternInfo(a, "") for a in str_list] + + def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") @@ -106,9 +110,9 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): assert ( DvcIgnorePatterns( *merge_patterns( - [".hg/", ".git/", ".dvc/"], + _to_pattern_info_list([".hg/", ".git/", ".dvc/"]), os.fspath(tmp_dir), - [os.path.basename(dname)], + _to_pattern_info_list([os.path.basename(dname)]), top_ignore_path, ) ) @@ -309,17 +313,24 @@ def test_pattern_trie_tree(tmp_dir, dvc): os.fspath(tmp_dir / "top" / "first" / "middle" / "second" / "bottom") ) - base_pattern = [".hg/", ".git/", ".dvc/"], os.fspath(tmp_dir) + base_pattern = ( + _to_pattern_info_list([".hg/", ".git/", ".dvc/"]), + os.fspath(tmp_dir), + ) first_pattern = merge_patterns( - *base_pattern, ["a", "b", "c"], os.fspath(tmp_dir / "top" / "first") + *base_pattern, + _to_pattern_info_list(["a", "b", "c"]), + os.fspath(tmp_dir / "top" / "first") ) second_pattern = merge_patterns( *first_pattern, - ["d", "e", "f"], + _to_pattern_info_list(["d", "e", "f"]), os.fspath(tmp_dir / "top" / "first" / "middle" / "second") ) other_pattern = merge_patterns( - *base_pattern, ["1", "2", "3"], os.fspath(tmp_dir / "other") + *base_pattern, + _to_pattern_info_list(["1", "2", "3"]), + os.fspath(tmp_dir / "other") ) assert DvcIgnorePatterns(*base_pattern) == ignore_pattern_top diff --git a/tests/unit/test_pathspec_math.py b/tests/unit/test_pathspec_math.py index 1cc6837153..d843a71a3d 100644 --- a/tests/unit/test_pathspec_math.py +++ b/tests/unit/test_pathspec_math.py @@ -1,6 +1,6 @@ import pytest -from dvc.pathspec_math import _change_dirname +from dvc.pathspec_math import PatternInfo, _change_dirname @pytest.mark.parametrize( @@ -69,4 +69,6 @@ ], ) def test_dvcignore_pattern_change_dir(tmp_dir, patterns, dirname, changed): - assert _change_dirname(dirname, [patterns], "/") == [changed] + assert _change_dirname(dirname, [PatternInfo(patterns, "")], "/") == [ + PatternInfo(changed, "") + ] From 86a4ff7af5dadcf51d13ff493d86440be7eb4286 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sun, 26 Jul 2020 10:40:26 +0800 Subject: [PATCH 03/15] solve failure in Windows --- tests/func/test_check_ignore.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index 0d7026bc80..1368b126d7 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -82,7 +82,7 @@ def test_check_ignore_sub_repo(tmp_dir, dvc): assert main(["check-ignore", "-q", os.path.join("dir", "foo")]) == 1 -def test_check_multi_ignore_file(tmp_dir, dvc, caplog): +def test_check_sub_dir_ignore_file(tmp_dir, dvc, caplog): tmp_dir.gen( { DvcIgnore.DVCIGNORE_FILE: "other", @@ -91,7 +91,10 @@ def test_check_multi_ignore_file(tmp_dir, dvc, caplog): ) assert main(["check-ignore", "-d", os.path.join("dir", "foo")]) == 0 - assert "dir/.dvcignore:2:foo\tdir/foo" in caplog.text + assert ( + "dir/.dvcignore:2:foo\t{}".format(os.path.join("dir", "foo")) + in caplog.text + ) sub_dir = tmp_dir / "dir" with sub_dir.chdir(): From 1f13e843bfc2a5060ede10b2ecb68722cf7c7761 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sun, 26 Jul 2020 11:42:13 +0800 Subject: [PATCH 04/15] For Windows ci --- tests/func/test_check_ignore.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index 1368b126d7..9614436edd 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -92,7 +92,10 @@ def test_check_sub_dir_ignore_file(tmp_dir, dvc, caplog): assert main(["check-ignore", "-d", os.path.join("dir", "foo")]) == 0 assert ( - "dir/.dvcignore:2:foo\t{}".format(os.path.join("dir", "foo")) + "{}:2:foo\t{}".format( + os.path.join("dir", DvcIgnore.DVCIGNORE_FILE), + os.path.join("dir", "foo"), + ) in caplog.text ) From c3e2fbdf74eb28b4323539d48fe0bcfdcaf93513 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sun, 26 Jul 2020 17:40:18 +0800 Subject: [PATCH 05/15] Some help ducuments issue. --- dvc/command/check_ignore.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index fab29e8605..175df4b2ed 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -61,9 +61,11 @@ def add_parser(subparsers, parent_parser): action="store_true", default=False, help="Show given paths which don’t match any pattern. " - "Only used when --verbose is enabled.", + "Only used when --details is enabled.", ) parser.add_argument( - "targets", nargs="+", help="Input files/directories to add.", + "targets", + nargs="+", + help="Input files/directories to check " "ignore patterns.", ).complete = completion.FILE parser.set_defaults(func=CmdCheckIgnore) From 3eea263cefe57c057a404fe2a5e42365018d0f9f Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Mon, 27 Jul 2020 07:12:27 +0800 Subject: [PATCH 06/15] Update dvc/ignore.py abspath Co-authored-by: Ruslan Kuprieiev --- dvc/ignore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 63620d2c69..cfd201137b 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -272,7 +272,7 @@ def _outside_repo(self, path): def check_ignore(self, targets): check_results = [] for target in targets: - full_target = os.path.join(os.getcwd(), target) + full_target = os.path.abspath(target) if not self._outside_repo(full_target): dirname, basename = os.path.split( os.path.normpath(full_target) From 27eec49ed67346c89157edea998b6b5fa81d88f5 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Tue, 28 Jul 2020 19:43:23 +0800 Subject: [PATCH 07/15] Refactor with OutOfWorkSpaceError --- dvc/ignore.py | 75 ++++++++++++++++++--------------- tests/func/test_check_ignore.py | 1 - 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 9559cc436b..63ed07545e 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -8,6 +8,7 @@ from pathspec.util import normalize_file from pygtrie import StringTrie +from dvc.exceptions import DvcException from dvc.path_info import PathInfo from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.system import System @@ -16,6 +17,10 @@ logger = logging.getLogger(__name__) +class OutOfWorkingSpaceError(DvcException): + """Thrown when unable to acquire the lock for DVC repo.""" + + class DvcIgnore: DVCIGNORE_FILE = ".dvcignore" @@ -85,7 +90,9 @@ def _get_normalize_path(self, dirname, basename): # NOTE: `os.path.join` is ~x5.5 slower path = f"{rel}{os.sep}{basename}" else: - return False + raise OutOfWorkingSpaceError( + f"`{dirname}` is out side of `{self.dirname}`" + ) if not System.is_unix(): path = normalize_file(path) @@ -116,16 +123,13 @@ def _ignore_details(self, path, is_dir): result = [] for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list): regex = re.compile(ignore[0]) - # skip system pattern - if not pattern.file_info: - continue - if is_dir: - path_dir = f"{path}/" - if regex.match(path) or regex.match(path_dir): - result.append(pattern.file_info) - else: - if regex.match(path): - result.append(pattern.file_info) + if regex.match(path) or (is_dir and regex.match(f"{path}/")): + if not pattern.file_info: + raise OutOfWorkingSpaceError( + f"`{path}` is not in work space." + ) + result.append(pattern.file_info) + return result def __hash__(self): @@ -233,10 +237,10 @@ def _update_sub_repo(self, root, dirs): self.ignores_trie_tree[root] = new_pattern def __call__(self, root, dirs, files): - ignore_pattern = self._get_trie_pattern(root) - if ignore_pattern: + try: + ignore_pattern = self._get_trie_pattern(root) return ignore_pattern(root, dirs, files) - else: + except OutOfWorkingSpaceError: return dirs, files def _get_trie_pattern(self, dirname): @@ -246,8 +250,9 @@ def _get_trie_pattern(self, dirname): prefix = self.ignores_trie_tree.longest_prefix(dirname).key if not prefix: - # outside of the repo - return None + raise OutOfWorkingSpaceError( + f"`{dirname}` is out side of `{self.root_dir}`" + ) dirs = list( takewhile( @@ -264,14 +269,13 @@ def _get_trie_pattern(self, dirname): return self.ignores_trie_tree.get(dirname) def _is_ignored(self, path, is_dir=False): - if self._outside_repo(path): - return True - dirname, basename = os.path.split(os.path.normpath(path)) - ignore_pattern = self._get_trie_pattern(dirname) - if ignore_pattern: + try: + self._outside_repo(path) + dirname, basename = os.path.split(os.path.normpath(path)) + ignore_pattern = self._get_trie_pattern(dirname) return ignore_pattern.matches(dirname, basename, is_dir) - else: - return False + except OutOfWorkingSpaceError: + return True def is_ignored_dir(self, path): path = os.path.abspath(path) @@ -294,28 +298,29 @@ def _outside_repo(self, path): [os.path.abspath(path), self.root_dir] ) ): - return True - return False + raise OutOfWorkingSpaceError(f"{path} is out of {self.root_dir}") def check_ignore(self, targets): check_results = [] for target in targets: full_target = os.path.abspath(target) - if not self._outside_repo(full_target): + try: + self._outside_repo(full_target) dirname, basename = os.path.split( os.path.normpath(full_target) ) pattern = self._get_trie_pattern(dirname) - if pattern: - matches = pattern.match_details( - dirname, basename, os.path.isdir(full_target) - ) + matches = pattern.match_details( + dirname, basename, os.path.isdir(full_target) + ) - if matches: - check_results.append( - CheckIgnoreResult(target, True, matches) - ) - continue + if matches: + check_results.append( + CheckIgnoreResult(target, True, matches) + ) + continue + except OutOfWorkingSpaceError: + pass check_results.append(CheckIgnoreResult(target, False, ["::"])) return check_results diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index 9614436edd..4b863ee384 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -70,7 +70,6 @@ def test_check_ignore_dir(tmp_dir, dvc, path, ret): def test_check_ignore_default_dir(tmp_dir, dvc): - tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/") assert main(["check-ignore", "-q", ".dvc"]) == 1 From 10d23e69999c43a1c120f26b9e0b67331a82880e Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Tue, 28 Jul 2020 22:04:09 +0800 Subject: [PATCH 08/15] Solve a bug --- dvc/ignore.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 63ed07545e..af50182de3 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -270,7 +270,7 @@ def _get_trie_pattern(self, dirname): def _is_ignored(self, path, is_dir=False): try: - self._outside_repo(path) + self._is_inside_repo(path) dirname, basename = os.path.split(os.path.normpath(path)) ignore_pattern = self._get_trie_pattern(dirname) return ignore_pattern.matches(dirname, basename, is_dir) @@ -285,9 +285,10 @@ def is_ignored_dir(self, path): return self._is_ignored(path, True) def is_ignored_file(self, path): + path = os.path.abspath(path) return self._is_ignored(path, False) - def _outside_repo(self, path): + def _is_inside_repo(self, path): path = PathInfo(path) # paths outside of the repo should be ignored @@ -305,7 +306,7 @@ def check_ignore(self, targets): for target in targets: full_target = os.path.abspath(target) try: - self._outside_repo(full_target) + self._is_inside_repo(full_target) dirname, basename = os.path.split( os.path.normpath(full_target) ) From 0ac9c851200aebdfbfaff5f3b907ce92b662cf84 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Wed, 29 Jul 2020 13:48:09 +0800 Subject: [PATCH 09/15] Update dvc/command/check_ignore.py Co-authored-by: Jorge Orpinel --- dvc/command/check_ignore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index 175df4b2ed..e1cb19f345 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -53,7 +53,7 @@ def add_parser(subparsers, parent_parser): "--details", action="store_true", default=False, - help="Show the exclude pattern together with the path.", + help="Show the exclude pattern together with each target path.", ) parser.add_argument( "-n", From ab7237dcb3ff61760bbc110190c110f46e093b4c Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Wed, 29 Jul 2020 13:48:18 +0800 Subject: [PATCH 10/15] Update dvc/command/check_ignore.py Co-authored-by: Jorge Orpinel --- dvc/command/check_ignore.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index e1cb19f345..c84a4d9b37 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -60,8 +60,8 @@ def add_parser(subparsers, parent_parser): "--non-matching", action="store_true", default=False, - help="Show given paths which don’t match any pattern. " - "Only used when --details is enabled.", + help="Show the target paths which don’t match any pattern. " + "Only usable when `--details` is also employed", ) parser.add_argument( "targets", From 3cf35f009585b1e5e2fbf11d90f2844a6668109c Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 30 Jul 2020 02:35:17 +0300 Subject: [PATCH 11/15] Update dvc/command/check_ignore.py --- dvc/command/check_ignore.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index c84a4d9b37..464575972b 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -16,7 +16,6 @@ def __init__(self, args): def _show_results(self, results): for result in results: if result.matches or self.args.non_matching: - # if self.args.details: logger.info( "{}\t{}".format(result.patterns[-1], result.file) From 484473583a4ad2232b13a78321bc84dc7911e315 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Thu, 30 Jul 2020 14:51:10 +0800 Subject: [PATCH 12/15] Revert "Refactor with OutOfWorkSpaceError" This reverts commit 27eec49ed67346c89157edea998b6b5fa81d88f5. --- dvc/ignore.py | 78 ++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index af50182de3..9559cc436b 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -8,7 +8,6 @@ from pathspec.util import normalize_file from pygtrie import StringTrie -from dvc.exceptions import DvcException from dvc.path_info import PathInfo from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.system import System @@ -17,10 +16,6 @@ logger = logging.getLogger(__name__) -class OutOfWorkingSpaceError(DvcException): - """Thrown when unable to acquire the lock for DVC repo.""" - - class DvcIgnore: DVCIGNORE_FILE = ".dvcignore" @@ -90,9 +85,7 @@ def _get_normalize_path(self, dirname, basename): # NOTE: `os.path.join` is ~x5.5 slower path = f"{rel}{os.sep}{basename}" else: - raise OutOfWorkingSpaceError( - f"`{dirname}` is out side of `{self.dirname}`" - ) + return False if not System.is_unix(): path = normalize_file(path) @@ -123,13 +116,16 @@ def _ignore_details(self, path, is_dir): result = [] for ignore, pattern in zip(self.regex_pattern_list, self.pattern_list): regex = re.compile(ignore[0]) - if regex.match(path) or (is_dir and regex.match(f"{path}/")): - if not pattern.file_info: - raise OutOfWorkingSpaceError( - f"`{path}` is not in work space." - ) - result.append(pattern.file_info) - + # skip system pattern + if not pattern.file_info: + continue + if is_dir: + path_dir = f"{path}/" + if regex.match(path) or regex.match(path_dir): + result.append(pattern.file_info) + else: + if regex.match(path): + result.append(pattern.file_info) return result def __hash__(self): @@ -237,10 +233,10 @@ def _update_sub_repo(self, root, dirs): self.ignores_trie_tree[root] = new_pattern def __call__(self, root, dirs, files): - try: - ignore_pattern = self._get_trie_pattern(root) + ignore_pattern = self._get_trie_pattern(root) + if ignore_pattern: return ignore_pattern(root, dirs, files) - except OutOfWorkingSpaceError: + else: return dirs, files def _get_trie_pattern(self, dirname): @@ -250,9 +246,8 @@ def _get_trie_pattern(self, dirname): prefix = self.ignores_trie_tree.longest_prefix(dirname).key if not prefix: - raise OutOfWorkingSpaceError( - f"`{dirname}` is out side of `{self.root_dir}`" - ) + # outside of the repo + return None dirs = list( takewhile( @@ -269,13 +264,14 @@ def _get_trie_pattern(self, dirname): return self.ignores_trie_tree.get(dirname) def _is_ignored(self, path, is_dir=False): - try: - self._is_inside_repo(path) - dirname, basename = os.path.split(os.path.normpath(path)) - ignore_pattern = self._get_trie_pattern(dirname) - return ignore_pattern.matches(dirname, basename, is_dir) - except OutOfWorkingSpaceError: + if self._outside_repo(path): return True + dirname, basename = os.path.split(os.path.normpath(path)) + ignore_pattern = self._get_trie_pattern(dirname) + if ignore_pattern: + return ignore_pattern.matches(dirname, basename, is_dir) + else: + return False def is_ignored_dir(self, path): path = os.path.abspath(path) @@ -285,10 +281,9 @@ def is_ignored_dir(self, path): return self._is_ignored(path, True) def is_ignored_file(self, path): - path = os.path.abspath(path) return self._is_ignored(path, False) - def _is_inside_repo(self, path): + def _outside_repo(self, path): path = PathInfo(path) # paths outside of the repo should be ignored @@ -299,29 +294,28 @@ def _is_inside_repo(self, path): [os.path.abspath(path), self.root_dir] ) ): - raise OutOfWorkingSpaceError(f"{path} is out of {self.root_dir}") + return True + return False def check_ignore(self, targets): check_results = [] for target in targets: full_target = os.path.abspath(target) - try: - self._is_inside_repo(full_target) + if not self._outside_repo(full_target): dirname, basename = os.path.split( os.path.normpath(full_target) ) pattern = self._get_trie_pattern(dirname) - matches = pattern.match_details( - dirname, basename, os.path.isdir(full_target) - ) - - if matches: - check_results.append( - CheckIgnoreResult(target, True, matches) + if pattern: + matches = pattern.match_details( + dirname, basename, os.path.isdir(full_target) ) - continue - except OutOfWorkingSpaceError: - pass + + if matches: + check_results.append( + CheckIgnoreResult(target, True, matches) + ) + continue check_results.append(CheckIgnoreResult(target, False, ["::"])) return check_results From 4ee24f048e05f5dbfd62b3cd8ac21a3c87cccd3f Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Thu, 30 Jul 2020 18:23:45 +0800 Subject: [PATCH 13/15] Two change request 1. Argument `targets`'s description. 2. Error handling of `_get_normalize_path` --- dvc/command/check_ignore.py | 3 ++- dvc/ignore.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index 464575972b..394703ca68 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -65,6 +65,7 @@ def add_parser(subparsers, parent_parser): parser.add_argument( "targets", nargs="+", - help="Input files/directories to check " "ignore patterns.", + help="Exact or wildcard paths of files or directories to check " + "ignore patterns.", ).complete = completion.FILE parser.set_defaults(func=CmdCheckIgnore) diff --git a/dvc/ignore.py b/dvc/ignore.py index 9559cc436b..c7ee61fcf1 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -93,6 +93,8 @@ def _get_normalize_path(self, dirname, basename): def matches(self, dirname, basename, is_dir=False): path = self._get_normalize_path(dirname, basename) + if not path: + return False return self.ignore(path, is_dir) def ignore(self, path, is_dir): @@ -110,6 +112,8 @@ def ignore(self, path, is_dir): def match_details(self, dirname, basename, is_dir=False): path = self._get_normalize_path(dirname, basename) + if not path: + return False return self._ignore_details(path, is_dir) def _ignore_details(self, path, is_dir): From 54fbe31da8c21669cd7cb87ffdf2ec46943dd261 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Fri, 31 Jul 2020 08:42:10 +0800 Subject: [PATCH 14/15] Update dvc/main.py Co-authored-by: Ruslan Kuprieiev --- dvc/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/main.py b/dvc/main.py index 6c82a11f0b..fc1450e536 100644 --- a/dvc/main.py +++ b/dvc/main.py @@ -84,7 +84,7 @@ def main(argv=None): # noqa: C901 ret = 255 try: - if ret != 0 and ret != 1: + if ret != 0: logger.info(FOOTER) if analytics.is_enabled(): From 930534ba85e83d34a45eead6e88a4722818af918 Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Sat, 1 Aug 2020 16:49:47 +0800 Subject: [PATCH 15/15] `check_ignore` now only accept one path a time 1. Add a new test for the out side repo cases 2. check ignore now only check one file not file lists --- dvc/command/check_ignore.py | 27 +++++++++++++-------------- dvc/ignore.py | 33 ++++++++++++--------------------- tests/func/test_check_ignore.py | 5 +++++ 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/dvc/command/check_ignore.py b/dvc/command/check_ignore.py index 394703ca68..ef3f1250bb 100644 --- a/dvc/command/check_ignore.py +++ b/dvc/command/check_ignore.py @@ -13,15 +13,12 @@ def __init__(self, args): super().__init__(args) self.ignore_filter = self.repo.tree.dvcignore - def _show_results(self, results): - for result in results: - if result.matches or self.args.non_matching: - if self.args.details: - logger.info( - "{}\t{}".format(result.patterns[-1], result.file) - ) - else: - logger.info(result.file) + def _show_results(self, result): + if result.match or self.args.non_matching: + if self.args.details: + logger.info("{}\t{}".format(result.patterns[-1], result.file)) + else: + logger.info(result.file) def run(self): if self.args.non_matching and not self.args.details: @@ -30,11 +27,13 @@ def run(self): if self.args.quiet and self.args.details: raise DvcException("cannot both --details and --quiet") - results = self.ignore_filter.check_ignore(self.args.targets) - self._show_results(results) - if any(result.matches for result in results): - return 0 - return 1 + ret = 1 + for target in self.args.targets: + result = self.ignore_filter.check_ignore(target) + self._show_results(result) + if result.match: + ret = 0 + return ret def add_parser(subparsers, parent_parser): diff --git a/dvc/ignore.py b/dvc/ignore.py index c7ee61fcf1..0e80a02103 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -148,7 +148,7 @@ def __bool__(self): CheckIgnoreResult = namedtuple( - "CheckIgnoreResult", ["file", "matches", "patterns"] + "CheckIgnoreResult", ["file", "match", "patterns"] ) @@ -301,25 +301,16 @@ def _outside_repo(self, path): return True return False - def check_ignore(self, targets): - check_results = [] - for target in targets: - full_target = os.path.abspath(target) - if not self._outside_repo(full_target): - dirname, basename = os.path.split( - os.path.normpath(full_target) + def check_ignore(self, target): + full_target = os.path.abspath(target) + if not self._outside_repo(full_target): + dirname, basename = os.path.split(os.path.normpath(full_target)) + pattern = self._get_trie_pattern(dirname) + if pattern: + matches = pattern.match_details( + dirname, basename, os.path.isdir(full_target) ) - pattern = self._get_trie_pattern(dirname) - if pattern: - matches = pattern.match_details( - dirname, basename, os.path.isdir(full_target) - ) - - if matches: - check_results.append( - CheckIgnoreResult(target, True, matches) - ) - continue - check_results.append(CheckIgnoreResult(target, False, ["::"])) - return check_results + if matches: + return CheckIgnoreResult(target, True, matches) + return CheckIgnoreResult(target, False, ["::"]) diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index 4b863ee384..b53f72f531 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -73,6 +73,11 @@ def test_check_ignore_default_dir(tmp_dir, dvc): assert main(["check-ignore", "-q", ".dvc"]) == 1 +def test_check_ignore_out_side_repo(tmp_dir, dvc): + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "file") + assert main(["check-ignore", "-q", "../file"]) == 1 + + def test_check_ignore_sub_repo(tmp_dir, dvc): tmp_dir.gen( {DvcIgnore.DVCIGNORE_FILE: "other", "dir": {".dvc": {}, "foo": "bar"}}