diff --git a/dvc/ignore.py b/dvc/ignore.py index f74815e8fe..5c91d302ba 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import os +from pathspec import PathSpec +from pathspec.patterns import GitWildMatchPattern -from dulwich.ignore import match_pattern, read_ignore_patterns from dvc.utils import relpath -from dvc.utils.compat import cast_bytes from dvc.utils.fs import get_parent_dirs_up_to @@ -13,23 +13,12 @@ def __init__(self, tree): self.tree = tree def read_patterns(self, path): - with self.tree.open(path, binary=True) as stream: - return self._read_patterns(stream) + with self.tree.open(path) as fobj: + return PathSpec.from_lines(GitWildMatchPattern, fobj) def get_repo_root(self): return self.tree.tree_root - def _read_patterns(self, binary_stream): - negate_patterns = [] - patterns = [] - for pattern in read_ignore_patterns(binary_stream): - if pattern.lstrip().startswith(b"!"): - negate_patterns.append(pattern) - else: - patterns.append(pattern) - - return negate_patterns, patterns - class DvcIgnore(object): DVCIGNORE_FILE = ".dvcignore" @@ -42,12 +31,8 @@ class DvcIgnoreFromFile(DvcIgnore): def __init__(self, ignore_file_path, ignore_handler): self.ignore_file_path = ignore_file_path self.dirname = os.path.normpath(os.path.dirname(ignore_file_path)) - self.patterns = [] - self.negate_patterns = [] - self.negate_patterns, self.patterns = ignore_handler.read_patterns( - ignore_file_path - ) + self.ignore_spec = ignore_handler.read_patterns(ignore_file_path) def __call__(self, root, dirs, files): files = [f for f in files if not self.matches(root, f)] @@ -55,26 +40,13 @@ def __call__(self, root, dirs, files): return dirs, files - def get_match(self, abs_path): + def matches(self, dirname, basename): + abs_path = os.path.join(dirname, basename) relative_path = relpath(abs_path, self.dirname) if os.name == "nt": relative_path = relative_path.replace("\\", "/") - relative_path = cast_bytes(relative_path, "utf-8") - - for pattern in self.patterns: - if match_pattern( - relative_path, pattern - ) and self._no_negate_pattern_matches(relative_path): - return (abs_path, pattern, self.ignore_file_path) - return None - - def matches(self, dirname, basename): - if self.get_match(os.path.join(dirname, basename)): - return True - return False - def _no_negate_pattern_matches(self, path): - return all([not match_pattern(path, p) for p in self.negate_patterns]) + return self.ignore_spec.match_file(relative_path) def __hash__(self): return hash(self.ignore_file_path) diff --git a/setup.py b/setup.py index 2352bfcca5..0e2f3437f0 100644 --- a/setup.py +++ b/setup.py @@ -58,10 +58,10 @@ def run(self): "treelib>=1.5.5", "inflect>=2.1.0", "humanize>=0.5.1", - "dulwich>=0.19.11", "ruamel.yaml>=0.15.91", "psutil==5.6.2", "funcy>=1.12", + "pathspec>=0.5.9", ] # Extra dependencies for remote integrations diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index ca90387370..a646d4afb7 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -1,6 +1,6 @@ import os -from dvc.ignore import DvcIgnoreFromFile, DvcIgnore, DvcIgnoreFileHandler +from dvc.ignore import DvcIgnore, DvcIgnoreFileHandler from dvc.utils.compat import cast_bytes from tests.basic_env import TestDvc @@ -25,16 +25,6 @@ def _get_all_paths(self): return paths - def test_ignore_comments(self): - ignore_file = os.path.join(self.dvc.root_dir, DvcIgnore.DVCIGNORE_FILE) - with open(ignore_file, "w") as fobj: - fobj.write(os.path.basename(self.DATA)) - fobj.write(" #this is comment") - - ignore = DvcIgnoreFromFile(ignore_file, self.ignore_file_handler) - - self.assertEqual(1, len(ignore.patterns)) - def test_ignore_in_child_dir(self): ignore_file = os.path.join(self.dvc.root_dir, DvcIgnore.DVCIGNORE_FILE) with open(ignore_file, "w") as fobj: diff --git a/tests/unit/test_ignore.py b/tests/unit/test_ignore.py index 5c50feb040..aa0b8109b5 100644 --- a/tests/unit/test_ignore.py +++ b/tests/unit/test_ignore.py @@ -1,25 +1,19 @@ import os import pytest +from pathspec import PathSpec +from pathspec.patterns import GitWildMatchPattern + from dvc.ignore import DvcIgnoreFromFile, DvcIgnoreDir, DvcIgnoreFile from mock import patch, Mock -from dvc.utils.compat import cast_bytes - - -def read_pattern(p): - return cast_bytes(p, "utf-8") - - -def mock_dvcignore(dvcignore_path, negate_patterns, patterns): - negate_patterns = [read_pattern(p) for p in negate_patterns] - patterns = [read_pattern(p) for p in patterns] +def mock_dvcignore(dvcignore_path, patterns): mock_ignore_file_handler = Mock() with patch.object( mock_ignore_file_handler, "read_patterns", - return_value=(negate_patterns, patterns), + return_value=PathSpec.from_lines(GitWildMatchPattern, patterns), ): ignore_file = DvcIgnoreFromFile( dvcignore_path, mock_ignore_file_handler @@ -32,14 +26,13 @@ def test_ignore_from_file_should_filter_dirs_and_files(): os.path.sep, "full", "path", "to", "ignore", "file", ".dvcignore" ) - negate_patterns = [] patterns = ["dir_to_ignore", "file_to_ignore"] root = os.path.dirname(dvcignore_path) dirs = ["dir1", "dir2", "dir_to_ignore"] files = ["file1", "file2", "file_to_ignore"] - ignore = mock_dvcignore(dvcignore_path, negate_patterns, patterns) + ignore = mock_dvcignore(dvcignore_path, patterns) new_dirs, new_files = ignore(root, dirs, files) assert {"dir1", "dir2"} == set(new_dirs) @@ -47,13 +40,12 @@ def test_ignore_from_file_should_filter_dirs_and_files(): @pytest.mark.parametrize( - "file_to_ignore_relpath, negate_patterns,patterns, expected_match", + "file_to_ignore_relpath, patterns, expected_match", [ - ("to_ignore", [], ["to_ignore"], True), - ("to_ignore.txt", [], ["to_ignore*"], True), + ("to_ignore", ["to_ignore"], True), + ("to_ignore.txt", ["to_ignore*"], True), ( os.path.join("rel", "p", "p2", "to_ignore"), - [], ["rel/**/to_ignore"], True, ), @@ -67,30 +59,27 @@ def test_ignore_from_file_should_filter_dirs_and_files(): "file", "to_ignore", ), - [], ["to_ignore"], True, ), - ("to_ignore.txt", [], ["/*.txt"], True), + ("to_ignore.txt", ["/*.txt"], True), ( os.path.join("rel", "path", "path2", "to_ignore"), - [], ["rel/*/to_ignore"], False, ), - (os.path.join("path", "to_ignore.txt"), [], ["/*.txt"], False), + (os.path.join("path", "to_ignore.txt"), ["/*.txt"], False), ( os.path.join("rel", "path", "path2", "dont_ignore"), - [], ["rel/**/to_ignore"], False, ), - ("dont_ignore.txt", [], ["dont_ignore"], False), - ("dont_ignore.txt", ["!dont_ignore.txt"], ["dont*"], False), + ("dont_ignore.txt", ["dont_ignore"], False), + ("dont_ignore.txt", ["dont*", "!dont_ignore.txt"], False), ], ) def test_match_ignore_from_file( - file_to_ignore_relpath, negate_patterns, patterns, expected_match + file_to_ignore_relpath, patterns, expected_match ): dvcignore_path = os.path.join( @@ -98,7 +87,7 @@ def test_match_ignore_from_file( ) dvcignore_dirname = os.path.dirname(dvcignore_path) - ignore_file = mock_dvcignore(dvcignore_path, negate_patterns, patterns) + ignore_file = mock_dvcignore(dvcignore_path, patterns) assert ( ignore_file.matches(dvcignore_dirname, file_to_ignore_relpath)