From 956fec02ce445fa809ccd6f43556adf30ca42c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 18 Dec 2019 16:08:56 +0100 Subject: [PATCH 01/12] repo: move dvcignore from repo to tree --- dvc/remote/local.py | 4 ++-- dvc/repo/__init__.py | 10 +++------- dvc/repo/add.py | 2 +- dvc/repo/brancher.py | 4 ++-- dvc/scm/git/__init__.py | 5 ++++- dvc/scm/git/tree.py | 20 ++++++++++++++------ dvc/scm/tree.py | 30 ++++++++++++++++++++++++++++++ dvc/state.py | 10 ++++++---- tests/func/test_checkout.py | 2 +- tests/func/test_ignore.py | 33 ++++++++++++++++++--------------- tests/func/test_repo.py | 3 ++- tests/func/test_tree.py | 8 ++++---- 12 files changed, 87 insertions(+), 44 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 9c68e3a808..0da62ff256 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -138,7 +138,7 @@ def getsize(path_info): return os.path.getsize(fspath_py35(path_info)) def walk_files(self, path_info): - for fname in walk_files(path_info, self.repo.dvcignore): + for fname in walk_files(path_info, self.repo.tree.dvcignore): yield PathInfo(fname) def get_file_checksum(self, path_info): @@ -427,7 +427,7 @@ def _unprotect_file(path): os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) def _unprotect_dir(self, path): - for fname in walk_files(path, self.repo.dvcignore): + for fname in walk_files(path, self.repo.tree.dvcignore): RemoteLOCAL._unprotect_file(fname) def unprotect(self, path_info): diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 3df374160c..d812347820 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -9,13 +9,13 @@ from funcy import cached_property +from dvc.scm.tree import IgnoreTree from dvc.config import Config from dvc.exceptions import ( FileMissingError, NotDvcRepoError, OutputNotFoundError, ) -from dvc.ignore import DvcIgnoreFilter from dvc.path_info import PathInfo from dvc.remote.base import RemoteActionNotImplemented from dvc.utils import relpath @@ -84,7 +84,7 @@ def __init__(self, root_dir=None): self.scm = SCM(self.root_dir) - self.tree = WorkingTree(self.root_dir) + self.tree = IgnoreTree(WorkingTree(self.root_dir)) self.tmp_dir = os.path.join(self.dvc_dir, "tmp") makedirs(self.tmp_dir, exist_ok=True) @@ -392,7 +392,7 @@ def stages(self): outs = [] for root, dirs, files in self.tree.walk( - self.root_dir, dvcignore=self.dvcignore + self.root_dir, dvcignore=self.tree.dvcignore ): for fname in files: path = os.path.join(root, fname) @@ -487,10 +487,6 @@ def _open_cached(self, out, remote=None, mode="r", encoding=None): return _open(cache_file, mode=mode, encoding=encoding) - @cached_property - def dvcignore(self): - return DvcIgnoreFilter(self.root_dir, self.tree) - def close(self): self.scm.close() diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 8fe180d41a..f0353482b3 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -68,7 +68,7 @@ def _find_all_targets(repo, target, recursive): if os.path.isdir(target) and recursive: return [ fname - for fname in walk_files(target, repo.dvcignore) + for fname in walk_files(target, repo.tree.dvcignore) if not repo.is_dvc_internal(fname) if not Stage.is_stage_file(fname) if not repo.scm.belongs_to_scm(fname) diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index acca98c2c2..ff3e25d39d 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -1,6 +1,6 @@ from funcy import group_by -from dvc.scm.tree import WorkingTree +from dvc.scm.tree import WorkingTree, IgnoreTree def brancher( # noqa: E302 @@ -34,7 +34,7 @@ def brancher( # noqa: E302 scm = self.scm - self.tree = WorkingTree(self.root_dir) + self.tree = IgnoreTree(WorkingTree(self.root_dir)) yield "working tree" if all_commits: diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 668dc6fd57..9a71a3afb8 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -14,6 +14,7 @@ from dvc.scm.base import RevError from dvc.scm.base import SCMError from dvc.scm.git.tree import GitTree +from dvc.scm.tree import IgnoreTree from dvc.utils import fix_env from dvc.utils import is_binary from dvc.utils import relpath @@ -307,7 +308,9 @@ def belongs_to_scm(self, path): return basename == self.ignore_file or Git.GIT_DIR in path_parts def get_tree(self, rev): - return GitTree(self.repo, rev) + tree = GitTree(self.repo, rev) + + return IgnoreTree(tree) def _get_diff_trees(self, a_ref, b_ref): """Private method for getting the trees and commit hashes of 2 git diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index 0601c322df..a7991d53e0 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -118,7 +118,7 @@ def git_object_by_path(self, path): tree = tree[i] return tree - def _walk(self, tree, topdown=True): + def _walk(self, tree, topdown=True, dvcignore=None): dirs, nondirs = [], [] for i in _iter_tree(tree): if i.mode == GIT_MODE_DIR: @@ -127,14 +127,22 @@ def _walk(self, tree, topdown=True): nondirs.append(i.name) if topdown: - yield os.path.normpath(tree.abspath), dirs, nondirs + root = os.path.normpath(tree.abspath) + if dvcignore: + dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) + yield root, dirs, nondirs for i in dirs: - for x in self._walk(tree[i], topdown=True): - yield x + for root, dirs, nondirs in self._walk(tree[i], topdown=True): + if dvcignore: + dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) + yield root, dirs, nondirs if not topdown: - yield os.path.normpath(tree.abspath), dirs, nondirs + root = os.path.normpath(tree.abspath) + if dvcignore: + dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) + yield root, dirs, nondirs def walk(self, top, topdown=True, dvcignore=None): """Directory tree generator. @@ -148,5 +156,5 @@ def walk(self, top, topdown=True, dvcignore=None): if tree is None: raise IOError(errno.ENOENT, "No such file") - for x in self._walk(tree, topdown): + for x in self._walk(tree, topdown, dvcignore): yield x diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 0931a59d0a..06062f9966 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -1,5 +1,6 @@ import os +from dvc.ignore import DvcIgnoreFilter from dvc.utils import dvc_walk from dvc.utils.compat import open @@ -75,3 +76,32 @@ def onerror(e): os.path.abspath(top), dvcignore, topdown=topdown, onerror=onerror ): yield os.path.normpath(root), dirs, files + + +class IgnoreTree(BaseTree): + def __init__(self, tree): + self.tree = tree + + @property + def dvcignore(self): + return DvcIgnoreFilter(self.tree.tree_root, self.tree) + + @property + def tree_root(self): + return self.tree.tree_root + + def open(self, path, mode="r", encoding="utf-8"): + return self.tree.open(path, mode, encoding) + + def exists(self, path): + return self.tree.exists(path) + + def isdir(self, path): + return self.tree.isdir(path) + + def isfile(self, path): + return self.tree.isfile(path) + + def walk(self, top, topdown=True, dvcignore=None): + for r, d, fs in self.tree.walk(top, topdown, self.dvcignore): + yield r, d, fs diff --git a/dvc/state.py b/dvc/state.py index 9803e40c24..a3e27a8e9e 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -378,7 +378,7 @@ def save(self, path_info, checksum): assert os.path.exists(fspath_py35(path_info)) actual_mtime, actual_size = get_mtime_and_size( - path_info, self.repo.dvcignore + path_info, self.repo.tree.dvcignore ) actual_inode = get_inode(path_info) @@ -411,7 +411,7 @@ def get(self, path_info): return None actual_mtime, actual_size = get_mtime_and_size( - path, self.repo.dvcignore + path, self.repo.tree.dvcignore ) actual_inode = get_inode(path) @@ -439,7 +439,7 @@ def save_link(self, path_info): if not os.path.exists(path): return - mtime, _ = get_mtime_and_size(path, self.repo.dvcignore) + mtime, _ = get_mtime_and_size(path, self.repo.tree.dvcignore) inode = get_inode(path) relative_path = relpath(path, self.root_dir) @@ -469,7 +469,9 @@ def remove_unused_links(self, used): continue actual_inode = get_inode(path) - actual_mtime, _ = get_mtime_and_size(path, self.repo.dvcignore) + actual_mtime, _ = get_mtime_and_size( + path, self.repo.tree.dvcignore + ) if inode == actual_inode and mtime == actual_mtime: logger.debug("Removing '{}' as unused link.".format(path)) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 797e424708..0760bd2d52 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -137,7 +137,7 @@ def outs_info(self, stage): paths = [ path for output in stage["outs"] - for path in walk_files(output["path"], self.dvc.dvcignore) + for path in walk_files(output["path"], self.dvc.tree.dvcignore) ] return [ diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 3bf878a4a1..a2a4aae2dc 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -20,10 +20,10 @@ def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - assert _files_set("dir", dvc.dvcignore) == {"dir/other"} + assert _files_set("dir", dvc.tree.dvcignore) == {"dir/other"} monkeypatch.chdir("dir") - assert _files_set(".", dvc.dvcignore) == {"./other"} + assert _files_set(".", dvc.tree.dvcignore) == {"./other"} def test_ignore_unicode(tmp_dir, dvc): @@ -35,27 +35,27 @@ def test_ignore_unicode(tmp_dir, dvc): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/тест") - assert _files_set("dir", dvc.dvcignore) == {"dir/other"} + assert _files_set("dir", dvc.tree.dvcignore) == {"dir/other"} def test_rename_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored*") - mtime, size = get_mtime_and_size("dir", dvc.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) shutil.move("dir/ignored", "dir/ignored_new") - new_mtime, new_size = get_mtime_and_size("dir", dvc.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) assert new_mtime == mtime and new_size == size def test_rename_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) - mtime, size = get_mtime_and_size("dir", dvc.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) shutil.move("dir/foo", "dir/foo_new") - new_mtime, new_size = get_mtime_and_size("dir", dvc.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) assert new_mtime != mtime and new_size == size @@ -64,20 +64,20 @@ def test_remove_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - mtime, size = get_mtime_and_size("dir", dvc.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) os.remove("dir/ignored") - new_mtime, new_size = get_mtime_and_size("dir", dvc.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) assert new_mtime == mtime and new_size == size def test_remove_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) - mtime, size = get_mtime_and_size("dir", dvc.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) os.remove("dir/foo") - new_mtime, new_size = get_mtime_and_size("dir", dvc.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) assert new_mtime != mtime and new_size != size @@ -99,7 +99,7 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") - assert dvc.dvcignore.ignores == { + assert dvc.tree.dvcignore.ignores == { DvcIgnoreDirs([".git", ".hg", ".dvc"]), DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)), } @@ -112,10 +112,13 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, "foo", commit="add ignore") scm.checkout("master") - assert _files_set(".", dvc.dvcignore) == {"./foo", "./bar"} + assert _files_set(".", dvc.tree.dvcignore) == {"./foo", "./bar"} - dvc.tree = scm.get_tree("branch") - assert _files_set(".", dvc.dvcignore) == {"./bar"} + tree = scm.get_tree("branch") + files_on_branch = { + f for r, ds, fs in tree.walk(tree.tree_root) for f in fs + } + assert "foo" not in files_on_branch def _files_set(root, dvcignore): diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 51f3a87543..28f881693a 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -3,6 +3,7 @@ from dvc.scm.git.tree import GitTree from dvc.cache import Cache from dvc.repo import Repo +from dvc.scm.tree import IgnoreTree from dvc.system import System from dvc.utils.compat import fspath @@ -50,7 +51,7 @@ def collect_outs(*args, **kwargs): assert collect_outs("bar.dvc", with_deps=True) == {"foo", "bar"} - dvc.tree = GitTree(scm.repo, "new-branch") + dvc.tree = IgnoreTree(GitTree(scm.repo, "new-branch")) assert collect_outs("buzz.dvc", with_deps=True) == {"foo", "bar", "buzz"} assert collect_outs("buzz.dvc", with_deps=False) == {"buzz"} diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index c2ae316c21..2b2ad38b26 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -6,7 +6,7 @@ from dvc.ignore import DvcIgnoreFilter from dvc.scm import SCM from dvc.scm.git import GitTree -from dvc.scm.tree import WorkingTree +from dvc.scm.tree import WorkingTree, IgnoreTree from tests.basic_env import TestDir from tests.basic_env import TestGit from tests.basic_env import TestGitSubmodule @@ -82,14 +82,14 @@ class TestGitTree(TestGit, GitTreeTests): def setUp(self): super(TestGitTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = GitTree(self.git, "master") + self.tree = IgnoreTree(GitTree(self.git, "master")) class TestGitSubmoduleTree(TestGitSubmodule, GitTreeTests): def setUp(self): super(TestGitSubmoduleTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = GitTree(self.git, "master") + self.tree = IgnoreTree(GitTree(self.git, "master")) self._pushd(self._root_dir) @@ -177,7 +177,7 @@ def test_branch(self): scm = SCM(self._root_dir) scm.add([self.DATA_SUB_DIR]) scm.commit("add data_dir/data_sub_dir/data_sub") - tree = GitTree(self.git, "master") + tree = IgnoreTree(GitTree(self.git, "master")) self.assertWalkEqual( tree.walk("."), [ From f4ee01cffe36f3de392bd83408cf8ba3a77cd8da Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 20 Dec 2019 18:22:45 +0100 Subject: [PATCH 02/12] no need for dvcignore in walk args --- dvc/ignore.py | 10 +++++++--- dvc/repo/__init__.py | 8 +++----- dvc/repo/brancher.py | 4 ++-- dvc/scm/git/__init__.py | 6 ++---- dvc/scm/git/tree.py | 22 +++++++--------------- dvc/scm/tree.py | 27 ++++++++++++++++++--------- tests/func/test_repo.py | 4 ++-- tests/func/test_tree.py | 26 +++++++++++--------------- tests/unit/utils/test_fs.py | 6 ++---- 9 files changed, 54 insertions(+), 59 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index fbfcff3ac7..a092735543 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -72,11 +72,15 @@ def __eq__(self, other): class DvcIgnoreFilter(object): - def __init__(self, root_dir, tree): + def __init__(self, tree): self.tree = tree self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} - self._update(root_dir) - for root, dirs, _ in self.tree.walk(root_dir, dvcignore=self): + + def reload(self): + self._update(self.tree.tree_root) + for root, dirs, _ in self.tree.walk( + self.tree.tree_root, dvcignore=self + ): for d in dirs: self._update(os.path.join(root, d)) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index d812347820..ce3ce71178 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -9,7 +9,7 @@ from funcy import cached_property -from dvc.scm.tree import IgnoreTree +from dvc.scm.tree import CleanTree from dvc.config import Config from dvc.exceptions import ( FileMissingError, @@ -84,7 +84,7 @@ def __init__(self, root_dir=None): self.scm = SCM(self.root_dir) - self.tree = IgnoreTree(WorkingTree(self.root_dir)) + self.tree = CleanTree(WorkingTree(self.root_dir)) self.tmp_dir = os.path.join(self.dvc_dir, "tmp") makedirs(self.tmp_dir, exist_ok=True) @@ -391,9 +391,7 @@ def stages(self): stages = [] outs = [] - for root, dirs, files in self.tree.walk( - self.root_dir, dvcignore=self.tree.dvcignore - ): + for root, dirs, files in self.tree.walk(self.root_dir): for fname in files: path = os.path.join(root, fname) if not Stage.is_valid_filename(path): diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index ff3e25d39d..a24415a070 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -1,6 +1,6 @@ from funcy import group_by -from dvc.scm.tree import WorkingTree, IgnoreTree +from dvc.scm.tree import WorkingTree, CleanTree def brancher( # noqa: E302 @@ -34,7 +34,7 @@ def brancher( # noqa: E302 scm = self.scm - self.tree = IgnoreTree(WorkingTree(self.root_dir)) + self.tree = CleanTree(WorkingTree(self.root_dir)) yield "working tree" if all_commits: diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 9a71a3afb8..874c9e5100 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -14,7 +14,7 @@ from dvc.scm.base import RevError from dvc.scm.base import SCMError from dvc.scm.git.tree import GitTree -from dvc.scm.tree import IgnoreTree +from dvc.scm.tree import CleanTree from dvc.utils import fix_env from dvc.utils import is_binary from dvc.utils import relpath @@ -308,9 +308,7 @@ def belongs_to_scm(self, path): return basename == self.ignore_file or Git.GIT_DIR in path_parts def get_tree(self, rev): - tree = GitTree(self.repo, rev) - - return IgnoreTree(tree) + return CleanTree(GitTree(self.repo, rev)) def _get_diff_trees(self, a_ref, b_ref): """Private method for getting the trees and commit hashes of 2 git diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index a7991d53e0..ff0ce6e472 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -118,7 +118,7 @@ def git_object_by_path(self, path): tree = tree[i] return tree - def _walk(self, tree, topdown=True, dvcignore=None): + def _walk(self, tree, topdown=True): dirs, nondirs = [], [] for i in _iter_tree(tree): if i.mode == GIT_MODE_DIR: @@ -127,24 +127,16 @@ def _walk(self, tree, topdown=True, dvcignore=None): nondirs.append(i.name) if topdown: - root = os.path.normpath(tree.abspath) - if dvcignore: - dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) - yield root, dirs, nondirs + yield os.path.normpath(tree.abspath), dirs, nondirs for i in dirs: - for root, dirs, nondirs in self._walk(tree[i], topdown=True): - if dvcignore: - dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) - yield root, dirs, nondirs + for x in self._walk(tree[i], topdown=True): + yield x if not topdown: - root = os.path.normpath(tree.abspath) - if dvcignore: - dirs[:], nondirs[:] = dvcignore(root, dirs, nondirs) - yield root, dirs, nondirs + yield os.path.normpath(tree.abspath), dirs, nondirs - def walk(self, top, topdown=True, dvcignore=None): + def walk(self, top, topdown=True, **kwargs): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -156,5 +148,5 @@ def walk(self, top, topdown=True, dvcignore=None): if tree is None: raise IOError(errno.ENOENT, "No such file") - for x in self._walk(tree, topdown, dvcignore): + for x in self._walk(tree, topdown): yield x diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 06062f9966..4e8a84a0ec 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -1,5 +1,7 @@ import os +from funcy import cached_property + from dvc.ignore import DvcIgnoreFilter from dvc.utils import dvc_walk from dvc.utils.compat import open @@ -12,6 +14,9 @@ class BaseTree(object): def tree_root(self): pass + def set_dvcignore(self, dvcignore): + self.dvcignore = dvcignore + def open(self, path, mode="r", encoding="utf-8"): """Open file and return a stream.""" @@ -24,7 +29,7 @@ def isdir(self, path): def isfile(self, path): """Test whether a path is a regular file""" - def walk(self, top, topdown=True, dvcignore=None): + def walk(self, top, topdown=True, **kwargs): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -59,7 +64,7 @@ def isfile(self, path): """Test whether a path is a regular file""" return os.path.isfile(path) - def walk(self, top, topdown=True, dvcignore=None): + def walk(self, top, topdown=True, dvcignore=None, **kwargs): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -67,8 +72,6 @@ def walk(self, top, topdown=True, dvcignore=None): - it could raise exceptions, there is no onerror argument """ - assert dvcignore - def onerror(e): raise e @@ -78,13 +81,17 @@ def onerror(e): yield os.path.normpath(root), dirs, files -class IgnoreTree(BaseTree): +class CleanTree(BaseTree): def __init__(self, tree): + super(CleanTree, self).__init__() self.tree = tree + self._dvcignore = DvcIgnoreFilter(self.tree) + self.tree.set_dvcignore(self._dvcignore) - @property + @cached_property def dvcignore(self): - return DvcIgnoreFilter(self.tree.tree_root, self.tree) + self._dvcignore.reload() + return self._dvcignore @property def tree_root(self): @@ -102,6 +109,8 @@ def isdir(self, path): def isfile(self, path): return self.tree.isfile(path) - def walk(self, top, topdown=True, dvcignore=None): - for r, d, fs in self.tree.walk(top, topdown, self.dvcignore): + def walk(self, top, topdown=True, **kwargs): + for r, d, fs in self.tree.walk(top, topdown, dvcignore=self.dvcignore): + d[:], fs[:] = self.dvcignore(r, d, fs) + yield r, d, fs diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 28f881693a..d78b55fa65 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -3,7 +3,7 @@ from dvc.scm.git.tree import GitTree from dvc.cache import Cache from dvc.repo import Repo -from dvc.scm.tree import IgnoreTree +from dvc.scm.tree import CleanTree from dvc.system import System from dvc.utils.compat import fspath @@ -51,7 +51,7 @@ def collect_outs(*args, **kwargs): assert collect_outs("bar.dvc", with_deps=True) == {"foo", "bar"} - dvc.tree = IgnoreTree(GitTree(scm.repo, "new-branch")) + dvc.tree = CleanTree(GitTree(scm.repo, "new-branch")) assert collect_outs("buzz.dvc", with_deps=True) == {"foo", "bar", "buzz"} assert collect_outs("buzz.dvc", with_deps=False) == {"buzz"} diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 2b2ad38b26..908449fe38 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -3,10 +3,9 @@ from os.path import join -from dvc.ignore import DvcIgnoreFilter from dvc.scm import SCM from dvc.scm.git import GitTree -from dvc.scm.tree import WorkingTree, IgnoreTree +from dvc.scm.tree import WorkingTree, CleanTree from tests.basic_env import TestDir from tests.basic_env import TestGit from tests.basic_env import TestGitSubmodule @@ -82,14 +81,14 @@ class TestGitTree(TestGit, GitTreeTests): def setUp(self): super(TestGitTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = IgnoreTree(GitTree(self.git, "master")) + self.tree = CleanTree(GitTree(self.git, "master")) class TestGitSubmoduleTree(TestGitSubmodule, GitTreeTests): def setUp(self): super(TestGitSubmoduleTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = IgnoreTree(GitTree(self.git, "master")) + self.tree = CleanTree(GitTree(self.git, "master")) self._pushd(self._root_dir) @@ -108,10 +107,9 @@ def convert_to_sets(walk_results): class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir): def test(self): - tree = WorkingTree(self._root_dir) - dvcignore = DvcIgnoreFilter(self.root_dir, tree) + tree = CleanTree(WorkingTree(self._root_dir)) self.assertWalkEqual( - tree.walk(self._root_dir, dvcignore=dvcignore), + tree.walk(self._root_dir), [ ( self._root_dir, @@ -128,10 +126,9 @@ def test(self): ) def test_subdir(self): - tree = WorkingTree(self._root_dir) - dvcignore = DvcIgnoreFilter(self.root_dir, tree) + tree = CleanTree(WorkingTree(self._root_dir)) self.assertWalkEqual( - tree.walk(join("data_dir", "data_sub_dir"), dvcignore=dvcignore), + tree.walk(join("data_dir", "data_sub_dir")), [ ( join(self._root_dir, "data_dir", "data_sub_dir"), @@ -144,10 +141,9 @@ def test_subdir(self): class TestWalkInGit(AssertWalkEqualMixin, TestGit): def test_nobranch(self): - tree = WorkingTree(self._root_dir) - dvcignore = DvcIgnoreFilter(self._root_dir, tree) + tree = CleanTree(WorkingTree(self._root_dir)) self.assertWalkEqual( - tree.walk(".", dvcignore=dvcignore), + tree.walk("."), [ ( self._root_dir, @@ -163,7 +159,7 @@ def test_nobranch(self): ], ) self.assertWalkEqual( - tree.walk(join("data_dir", "data_sub_dir"), dvcignore=dvcignore), + tree.walk(join("data_dir", "data_sub_dir")), [ ( join(self._root_dir, "data_dir", "data_sub_dir"), @@ -177,7 +173,7 @@ def test_branch(self): scm = SCM(self._root_dir) scm.add([self.DATA_SUB_DIR]) scm.commit("add data_dir/data_sub_dir/data_sub") - tree = IgnoreTree(GitTree(self.git, "master")) + tree = CleanTree(GitTree(self.git, "master")) self.assertWalkEqual( tree.walk("."), [ diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 8092129076..7f4f580fc5 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -23,7 +23,7 @@ class TestMtimeAndSize(TestDir): def test(self): - dvcignore = DvcIgnoreFilter(self.root_dir, WorkingTree(self.root_dir)) + dvcignore = DvcIgnoreFilter(WorkingTree(self.root_dir)) file_time, file_size = get_mtime_and_size(self.DATA, dvcignore) dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, dvcignore) @@ -127,9 +127,7 @@ def test_get_inode(repo_dir): def test_path_object_and_str_are_valid_types_get_mtime_and_size( path, repo_dir ): - dvcignore = DvcIgnoreFilter( - repo_dir.root_dir, WorkingTree(repo_dir.root_dir) - ) + dvcignore = DvcIgnoreFilter(WorkingTree(repo_dir.root_dir)) time, size = get_mtime_and_size(path, dvcignore) object_time, object_size = get_mtime_and_size(PathInfo(path), dvcignore) assert time == object_time From aa613f9e6f0108df60b24d783a2db4217ff04a01 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 20 Dec 2019 19:15:33 +0100 Subject: [PATCH 03/12] dvcignore loading back in constructor --- dvc/ignore.py | 2 -- dvc/scm/tree.py | 8 +------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index a092735543..b710fe586d 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -75,8 +75,6 @@ class DvcIgnoreFilter(object): def __init__(self, tree): self.tree = tree self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} - - def reload(self): self._update(self.tree.tree_root) for root, dirs, _ in self.tree.walk( self.tree.tree_root, dvcignore=self diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 4e8a84a0ec..4deb21d256 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -14,9 +14,6 @@ class BaseTree(object): def tree_root(self): pass - def set_dvcignore(self, dvcignore): - self.dvcignore = dvcignore - def open(self, path, mode="r", encoding="utf-8"): """Open file and return a stream.""" @@ -85,13 +82,10 @@ class CleanTree(BaseTree): def __init__(self, tree): super(CleanTree, self).__init__() self.tree = tree - self._dvcignore = DvcIgnoreFilter(self.tree) - self.tree.set_dvcignore(self._dvcignore) @cached_property def dvcignore(self): - self._dvcignore.reload() - return self._dvcignore + return DvcIgnoreFilter(self.tree) @property def tree_root(self): From cf2fac5787fe4329267b22326efec22fe66af84b Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 24 Dec 2019 10:52:46 +0100 Subject: [PATCH 04/12] working tree: adjust walk behaviour to os.walk --- dvc/scm/tree.py | 2 +- tests/func/test_tree.py | 30 +++++------------------------- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 4deb21d256..d830397b8a 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -73,7 +73,7 @@ def onerror(e): raise e for root, dirs, files in dvc_walk( - os.path.abspath(top), dvcignore, topdown=topdown, onerror=onerror + top, dvcignore, topdown=topdown, onerror=onerror ): yield os.path.normpath(root), dirs, files diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 908449fe38..d3df8528e9 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -129,13 +129,7 @@ def test_subdir(self): tree = CleanTree(WorkingTree(self._root_dir)) self.assertWalkEqual( tree.walk(join("data_dir", "data_sub_dir")), - [ - ( - join(self._root_dir, "data_dir", "data_sub_dir"), - [], - ["data_sub"], - ) - ], + [(join("data_dir", "data_sub_dir"), [], ["data_sub"])], ) @@ -145,28 +139,14 @@ def test_nobranch(self): self.assertWalkEqual( tree.walk("."), [ - ( - self._root_dir, - ["data_dir"], - ["bar", "тест", "code.py", "foo"], - ), - (join(self._root_dir, "data_dir"), ["data_sub_dir"], ["data"]), - ( - join(self._root_dir, "data_dir", "data_sub_dir"), - [], - ["data_sub"], - ), + (".", ["data_dir"], ["bar", "тест", "code.py", "foo"]), + (join("data_dir"), ["data_sub_dir"], ["data"]), + (join("data_dir", "data_sub_dir"), [], ["data_sub"]), ], ) self.assertWalkEqual( tree.walk(join("data_dir", "data_sub_dir")), - [ - ( - join(self._root_dir, "data_dir", "data_sub_dir"), - [], - ["data_sub"], - ) - ], + [(join("data_dir", "data_sub_dir"), [], ["data_sub"])], ) def test_branch(self): From d8c0ae6909a32d37c9ab59df2f5b27db99e13a0f Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 24 Dec 2019 12:19:51 +0100 Subject: [PATCH 05/12] remove most occurences of dvc_walk --- dvc/remote/local.py | 6 ++--- dvc/repo/add.py | 3 +-- dvc/scm/tree.py | 5 +++++ dvc/state.py | 12 ++++------ dvc/utils/__init__.py | 4 ++-- dvc/utils/fs.py | 7 +++--- tests/func/test_checkout.py | 3 +-- tests/func/test_ignore.py | 33 ++++++++++++++-------------- tests/unit/remote/test_remote_dir.py | 2 +- tests/unit/utils/test_fs.py | 15 ++++++------- 10 files changed, 45 insertions(+), 45 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 0da62ff256..81b63d0618 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -83,7 +83,7 @@ def supported(cls, config): def list_cache_paths(self): assert self.path_info is not None - return walk_files(self.path_info, None) + return walk_files(self.path_info) def get(self, md5): if not md5: @@ -138,7 +138,7 @@ def getsize(path_info): return os.path.getsize(fspath_py35(path_info)) def walk_files(self, path_info): - for fname in walk_files(path_info, self.repo.tree.dvcignore): + for fname in self.repo.tree.walk_files(path_info): yield PathInfo(fname) def get_file_checksum(self, path_info): @@ -427,7 +427,7 @@ def _unprotect_file(path): os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) def _unprotect_dir(self, path): - for fname in walk_files(path, self.repo.tree.dvcignore): + for fname in self.repo.tree.walk_files(path): RemoteLOCAL._unprotect_file(fname) def unprotect(self, path_info): diff --git a/dvc/repo/add.py b/dvc/repo/add.py index f0353482b3..a94f0ef1a3 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -12,7 +12,6 @@ from dvc.repo.scm_context import scm_context from dvc.stage import Stage from dvc.utils import LARGE_DIR_SIZE -from dvc.utils import walk_files logger = logging.getLogger(__name__) @@ -68,7 +67,7 @@ def _find_all_targets(repo, target, recursive): if os.path.isdir(target) and recursive: return [ fname - for fname in walk_files(target, repo.tree.dvcignore) + for fname in repo.tree.walk_files(target) if not repo.is_dvc_internal(fname) if not Stage.is_stage_file(fname) if not repo.scm.belongs_to_scm(fname) diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index d830397b8a..4df46cce7e 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -108,3 +108,8 @@ def walk(self, top, topdown=True, **kwargs): d[:], fs[:] = self.dvcignore(r, d, fs) yield r, d, fs + + def walk_files(self, top): + for root, _, files in self.walk(top): + for file in files: + yield os.path.join(root, file) diff --git a/dvc/state.py b/dvc/state.py index a3e27a8e9e..5f10654d11 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -378,7 +378,7 @@ def save(self, path_info, checksum): assert os.path.exists(fspath_py35(path_info)) actual_mtime, actual_size = get_mtime_and_size( - path_info, self.repo.tree.dvcignore + path_info, self.repo.tree ) actual_inode = get_inode(path_info) @@ -410,9 +410,7 @@ def get(self, path_info): if not os.path.exists(path): return None - actual_mtime, actual_size = get_mtime_and_size( - path, self.repo.tree.dvcignore - ) + actual_mtime, actual_size = get_mtime_and_size(path, self.repo.tree) actual_inode = get_inode(path) existing_record = self.get_state_record_for_inode(actual_inode) @@ -439,7 +437,7 @@ def save_link(self, path_info): if not os.path.exists(path): return - mtime, _ = get_mtime_and_size(path, self.repo.tree.dvcignore) + mtime, _ = get_mtime_and_size(path, self.repo.tree) inode = get_inode(path) relative_path = relpath(path, self.root_dir) @@ -469,9 +467,7 @@ def remove_unused_links(self, used): continue actual_inode = get_inode(path) - actual_mtime, _ = get_mtime_and_size( - path, self.repo.tree.dvcignore - ) + actual_mtime, _ = get_mtime_and_size(path, self.repo.tree) if inode == actual_inode and mtime == actual_mtime: logger.debug("Removing '{}' as unused link.".format(path)) diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 92fd34a8a9..c9127be90e 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -305,8 +305,8 @@ def dvc_walk(top, dvcignore, topdown=True, onerror=None, followlinks=False): yield root, dirs, files -def walk_files(directory, dvcignore): - for root, _, files in dvc_walk(directory, dvcignore): +def walk_files(directory): + for root, _, files in os.walk(fspath(directory)): for f in files: yield os.path.join(root, f) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 26ee05506e..533cdc01e9 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -15,7 +15,6 @@ from dvc.utils import fspath from dvc.utils import fspath_py35 from dvc.utils import relpath -from dvc.utils import walk_files from dvc.utils.compat import str @@ -35,11 +34,13 @@ def get_inode(path): return inode -def get_mtime_and_size(path, dvcignore): +# FIXME: There is no use case of get_mtime_and_size without tree, we should +# move it to tree +def get_mtime_and_size(path, tree): if os.path.isdir(fspath_py35(path)): size = 0 files_mtimes = {} - for file_path in walk_files(path, dvcignore): + for file_path in tree.walk_files(path): try: stat = os.stat(file_path) except OSError as exc: diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 0760bd2d52..4f83f2cf83 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -19,7 +19,6 @@ from dvc.stage import StageFileDoesNotExistError from dvc.system import System from dvc.utils import relpath -from dvc.utils import walk_files from dvc.utils.compat import is_py2 from dvc.utils.stage import dump_stage_file from dvc.utils.stage import load_stage_file @@ -137,7 +136,7 @@ def outs_info(self, stage): paths = [ path for output in stage["outs"] - for path in walk_files(output["path"], self.dvc.tree.dvcignore) + for path in self.dvc.tree.walk_files(output["path"]) ] return [ diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index a2a4aae2dc..803315f6a3 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -7,8 +7,9 @@ from dvc.exceptions import DvcIgnoreInCollectedDirError from dvc.ignore import DvcIgnore, DvcIgnoreDirs, DvcIgnorePatterns from dvc.scm.tree import WorkingTree -from dvc.utils import walk_files, relpath -from dvc.utils.compat import fspath, fspath_py35 +from dvc.utils import relpath +from dvc.utils.compat import fspath_py35 +from dvc.utils.compat import fspath from dvc.utils.fs import get_mtime_and_size from dvc.remote import RemoteLOCAL @@ -20,10 +21,10 @@ def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - assert _files_set("dir", dvc.tree.dvcignore) == {"dir/other"} + assert _files_set("dir", dvc.tree) == {"dir/other"} monkeypatch.chdir("dir") - assert _files_set(".", dvc.tree.dvcignore) == {"./other"} + assert _files_set(".", dvc.tree) == {"./other"} def test_ignore_unicode(tmp_dir, dvc): @@ -35,27 +36,27 @@ def test_ignore_unicode(tmp_dir, dvc): tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/тест") - assert _files_set("dir", dvc.tree.dvcignore) == {"dir/other"} + assert _files_set("dir", dvc.tree) == {"dir/other"} def test_rename_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored*") - mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree) shutil.move("dir/ignored", "dir/ignored_new") - new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree) assert new_mtime == mtime and new_size == size def test_rename_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) - mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree) shutil.move("dir/foo", "dir/foo_new") - new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree) assert new_mtime != mtime and new_size == size @@ -64,20 +65,20 @@ def test_remove_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree) os.remove("dir/ignored") - new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree) assert new_mtime == mtime and new_size == size def test_remove_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) - mtime, size = get_mtime_and_size("dir", dvc.tree.dvcignore) + mtime, size = get_mtime_and_size("dir", dvc.tree) os.remove("dir/foo") - new_mtime, new_size = get_mtime_and_size("dir", dvc.tree.dvcignore) + new_mtime, new_size = get_mtime_and_size("dir", dvc.tree) assert new_mtime != mtime and new_size != size @@ -112,7 +113,7 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, "foo", commit="add ignore") scm.checkout("master") - assert _files_set(".", dvc.tree.dvcignore) == {"./foo", "./bar"} + assert _files_set(".", dvc.tree) == {"./foo", "./bar"} tree = scm.get_tree("branch") files_on_branch = { @@ -121,8 +122,8 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): assert "foo" not in files_on_branch -def _files_set(root, dvcignore): - return {to_posixpath(f) for f in walk_files(root, dvcignore)} +def _files_set(root, tree): + return {to_posixpath(f) for f in tree.walk_files(root)} def test_match_nested(tmp_dir, dvc): diff --git a/tests/unit/remote/test_remote_dir.py b/tests/unit/remote/test_remote_dir.py index 6c868a1a6e..b6d6c337b9 100644 --- a/tests/unit/remote/test_remote_dir.py +++ b/tests/unit/remote/test_remote_dir.py @@ -142,7 +142,7 @@ def test_download_dir(remote, tmpdir): remote.download(remote.path_info / "data", to_info) assert os.path.isdir(path) data_dir = tmpdir / "data" - assert len(list(walk_files(path, None))) == 7 + assert len(list(walk_files(path))) == 7 assert (data_dir / "alice").read_text(encoding="utf-8") == "alice" assert (data_dir / "alpha").read_text(encoding="utf-8") == "alpha" assert (data_dir / "subdir-file.txt").read_text( diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 7f4f580fc5..0dca8c65b4 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -5,9 +5,8 @@ from mock import patch import dvc -from dvc.ignore import DvcIgnoreFilter from dvc.path_info import PathInfo -from dvc.scm.tree import WorkingTree +from dvc.scm.tree import WorkingTree, CleanTree from dvc.system import System from dvc.utils import relpath from dvc.utils.compat import str @@ -23,9 +22,9 @@ class TestMtimeAndSize(TestDir): def test(self): - dvcignore = DvcIgnoreFilter(WorkingTree(self.root_dir)) - file_time, file_size = get_mtime_and_size(self.DATA, dvcignore) - dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, dvcignore) + tree = CleanTree(WorkingTree(self.root_dir)) + file_time, file_size = get_mtime_and_size(self.DATA, tree) + dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, tree) actual_file_size = os.path.getsize(self.DATA) actual_dir_size = os.path.getsize(self.DATA) + os.path.getsize( @@ -127,9 +126,9 @@ def test_get_inode(repo_dir): def test_path_object_and_str_are_valid_types_get_mtime_and_size( path, repo_dir ): - dvcignore = DvcIgnoreFilter(WorkingTree(repo_dir.root_dir)) - time, size = get_mtime_and_size(path, dvcignore) - object_time, object_size = get_mtime_and_size(PathInfo(path), dvcignore) + tree = CleanTree(WorkingTree(repo_dir.root_dir)) + time, size = get_mtime_and_size(path, tree) + object_time, object_size = get_mtime_and_size(PathInfo(path), tree) assert time == object_time assert size == object_size From d8813dc5ca49dedcce8bb60ed0652ed1e0edce94 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 27 Dec 2019 04:13:02 +0100 Subject: [PATCH 06/12] remove dvc_walk, skip .dvcignore gathering test --- dvc/ignore.py | 4 +--- dvc/scm/git/tree.py | 2 +- dvc/scm/tree.py | 12 +++++------- dvc/utils/__init__.py | 17 ----------------- tests/func/test_ignore.py | 4 ++++ 5 files changed, 11 insertions(+), 28 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index b710fe586d..17dbde4daa 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -76,9 +76,7 @@ def __init__(self, tree): self.tree = tree self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} self._update(self.tree.tree_root) - for root, dirs, _ in self.tree.walk( - self.tree.tree_root, dvcignore=self - ): + for root, dirs, _ in self.tree.walk(self.tree.tree_root): for d in dirs: self._update(os.path.join(root, d)) diff --git a/dvc/scm/git/tree.py b/dvc/scm/git/tree.py index ff0ce6e472..d775a5433d 100644 --- a/dvc/scm/git/tree.py +++ b/dvc/scm/git/tree.py @@ -136,7 +136,7 @@ def _walk(self, tree, topdown=True): if not topdown: yield os.path.normpath(tree.abspath), dirs, nondirs - def walk(self, top, topdown=True, **kwargs): + def walk(self, top, topdown=True): """Directory tree generator. See `os.walk` for the docs. Differences: diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 4df46cce7e..21fba40358 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -3,7 +3,6 @@ from funcy import cached_property from dvc.ignore import DvcIgnoreFilter -from dvc.utils import dvc_walk from dvc.utils.compat import open @@ -26,7 +25,7 @@ def isdir(self, path): def isfile(self, path): """Test whether a path is a regular file""" - def walk(self, top, topdown=True, **kwargs): + def walk(self, top, topdown=True): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -61,7 +60,7 @@ def isfile(self, path): """Test whether a path is a regular file""" return os.path.isfile(path) - def walk(self, top, topdown=True, dvcignore=None, **kwargs): + def walk(self, top, topdown=True): """Directory tree generator. See `os.walk` for the docs. Differences: @@ -72,15 +71,14 @@ def walk(self, top, topdown=True, dvcignore=None, **kwargs): def onerror(e): raise e - for root, dirs, files in dvc_walk( - top, dvcignore, topdown=topdown, onerror=onerror + for root, dirs, files in os.walk( + top, topdown=topdown, onerror=onerror ): yield os.path.normpath(root), dirs, files class CleanTree(BaseTree): def __init__(self, tree): - super(CleanTree, self).__init__() self.tree = tree @cached_property @@ -104,7 +102,7 @@ def isfile(self, path): return self.tree.isfile(path) def walk(self, top, topdown=True, **kwargs): - for r, d, fs in self.tree.walk(top, topdown, dvcignore=self.dvcignore): + for r, d, fs in self.tree.walk(top, topdown): d[:], fs[:] = self.dvcignore(r, d, fs) yield r, d, fs diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index c9127be90e..b8db58e1a2 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -288,23 +288,6 @@ def to_yaml_string(data): return stream.getvalue() -def dvc_walk(top, dvcignore, topdown=True, onerror=None, followlinks=False): - """ - Proxy for `os.walk` directory tree generator. - Utilizes DvcIgnoreFilter functionality. - """ - top = fspath_py35(top) - - for root, dirs, files in os.walk( - top, topdown=topdown, onerror=onerror, followlinks=followlinks - ): - - if dvcignore: - dirs[:], files[:] = dvcignore(root, dirs, files) - - yield root, dirs, files - - def walk_files(directory): for root, _, files in os.walk(fspath(directory)): for f in files: diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 803315f6a3..b51d30a735 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -90,6 +90,10 @@ def test_dvcignore_in_out_dir(tmp_dir, dvc): dvc.add("dir") +@pytest.mark.skip( + reason="unable to apply .dvcignore on collection stage " + "after removing dvc_walk" +) @pytest.mark.parametrize("dname", ["dir", "dir/subdir"]) def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): tmp_dir.gen({"dir": {"subdir": {}}}) From 56ed5351efd245f4d71513f30ceb72bd3203d295 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 27 Dec 2019 13:00:47 +0100 Subject: [PATCH 07/12] ignore: test: remove ignore collection test --- tests/func/test_ignore.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index b51d30a735..6adfb4484e 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -5,8 +5,7 @@ import pytest from dvc.exceptions import DvcIgnoreInCollectedDirError -from dvc.ignore import DvcIgnore, DvcIgnoreDirs, DvcIgnorePatterns -from dvc.scm.tree import WorkingTree +from dvc.ignore import DvcIgnore from dvc.utils import relpath from dvc.utils.compat import fspath_py35 from dvc.utils.compat import fspath @@ -90,26 +89,6 @@ def test_dvcignore_in_out_dir(tmp_dir, dvc): dvc.add("dir") -@pytest.mark.skip( - reason="unable to apply .dvcignore on collection stage " - "after removing dvc_walk" -) -@pytest.mark.parametrize("dname", ["dir", "dir/subdir"]) -def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): - tmp_dir.gen({"dir": {"subdir": {}}}) - - top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) - top_ignore_file.write_text(os.path.basename(dname)) - - ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE - ignore_file.write_text("foo") - - assert dvc.tree.dvcignore.ignores == { - DvcIgnoreDirs([".git", ".hg", ".dvc"]), - DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)), - } - - def test_ignore_on_branch(tmp_dir, scm, dvc): tmp_dir.scm_gen({"foo": "foo", "bar": "bar"}, commit="add files") From cf2d349bd72bd4c7273fe8ec20f51266bb7ad3ca Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 27 Dec 2019 13:13:46 +0100 Subject: [PATCH 08/12] tree: move CleanTree to ignore --- dvc/ignore.py | 38 ++++++++++++++++++++++++++++++++++++ dvc/repo/brancher.py | 3 ++- dvc/scm/git/__init__.py | 2 +- dvc/scm/tree.py | 39 ------------------------------------- tests/func/test_repo.py | 2 +- tests/func/test_tree.py | 3 ++- tests/unit/utils/test_fs.py | 3 ++- 7 files changed, 46 insertions(+), 44 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 17dbde4daa..54b04f5077 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -3,9 +3,11 @@ import logging import os +from funcy import cached_property from pathspec import PathSpec from pathspec.patterns import GitWildMatchPattern +from dvc.scm.tree import BaseTree from dvc.utils import relpath logger = logging.getLogger(__name__) @@ -90,3 +92,39 @@ def __call__(self, root, dirs, files): dirs, files = ignore(root, dirs, files) return dirs, files + + +class CleanTree(BaseTree): + def __init__(self, tree): + self.tree = tree + + @cached_property + def dvcignore(self): + return DvcIgnoreFilter(self.tree) + + @property + def tree_root(self): + return self.tree.tree_root + + def open(self, path, mode="r", encoding="utf-8"): + return self.tree.open(path, mode, encoding) + + def exists(self, path): + return self.tree.exists(path) + + def isdir(self, path): + return self.tree.isdir(path) + + def isfile(self, path): + return self.tree.isfile(path) + + def walk(self, top, topdown=True): + for r, d, fs in self.tree.walk(top, topdown): + d[:], fs[:] = self.dvcignore(r, d, fs) + + yield r, d, fs + + def walk_files(self, top): + for root, _, files in self.walk(top): + for file in files: + yield os.path.join(root, file) diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index a24415a070..99b6aab792 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -1,6 +1,7 @@ from funcy import group_by -from dvc.scm.tree import WorkingTree, CleanTree +from dvc.ignore import CleanTree +from dvc.scm.tree import WorkingTree def brancher( # noqa: E302 diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 874c9e5100..194c714fd6 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -8,13 +8,13 @@ from pathspec.patterns import GitWildMatchPattern from dvc.exceptions import GitHookAlreadyExistsError +from dvc.ignore import CleanTree from dvc.scm.base import Base from dvc.scm.base import CloneError from dvc.scm.base import FileNotInRepoError from dvc.scm.base import RevError from dvc.scm.base import SCMError from dvc.scm.git.tree import GitTree -from dvc.scm.tree import CleanTree from dvc.utils import fix_env from dvc.utils import is_binary from dvc.utils import relpath diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index 21fba40358..e95af7d57f 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -1,8 +1,5 @@ import os -from funcy import cached_property - -from dvc.ignore import DvcIgnoreFilter from dvc.utils.compat import open @@ -75,39 +72,3 @@ def onerror(e): top, topdown=topdown, onerror=onerror ): yield os.path.normpath(root), dirs, files - - -class CleanTree(BaseTree): - def __init__(self, tree): - self.tree = tree - - @cached_property - def dvcignore(self): - return DvcIgnoreFilter(self.tree) - - @property - def tree_root(self): - return self.tree.tree_root - - def open(self, path, mode="r", encoding="utf-8"): - return self.tree.open(path, mode, encoding) - - def exists(self, path): - return self.tree.exists(path) - - def isdir(self, path): - return self.tree.isdir(path) - - def isfile(self, path): - return self.tree.isfile(path) - - def walk(self, top, topdown=True, **kwargs): - for r, d, fs in self.tree.walk(top, topdown): - d[:], fs[:] = self.dvcignore(r, d, fs) - - yield r, d, fs - - def walk_files(self, top): - for root, _, files in self.walk(top): - for file in files: - yield os.path.join(root, file) diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index d78b55fa65..8bb7abfeda 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -1,9 +1,9 @@ import os +from dvc.ignore import CleanTree from dvc.scm.git.tree import GitTree from dvc.cache import Cache from dvc.repo import Repo -from dvc.scm.tree import CleanTree from dvc.system import System from dvc.utils.compat import fspath diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index d3df8528e9..29d8d5e211 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -3,9 +3,10 @@ from os.path import join +from dvc.ignore import CleanTree from dvc.scm import SCM from dvc.scm.git import GitTree -from dvc.scm.tree import WorkingTree, CleanTree +from dvc.scm.tree import WorkingTree from tests.basic_env import TestDir from tests.basic_env import TestGit from tests.basic_env import TestGitSubmodule diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 0dca8c65b4..bc15476294 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -5,8 +5,9 @@ from mock import patch import dvc +from dvc.ignore import CleanTree from dvc.path_info import PathInfo -from dvc.scm.tree import WorkingTree, CleanTree +from dvc.scm.tree import WorkingTree from dvc.system import System from dvc.utils import relpath from dvc.utils.compat import str From e225601031c98f3fd21a22dcc4731811b225c94f Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 27 Dec 2019 15:11:53 +0100 Subject: [PATCH 09/12] reintroduce dvcignore collection test --- dvc/ignore.py | 7 +++---- dvc/repo/brancher.py | 2 +- dvc/scm/git/__init__.py | 8 ++++---- dvc/scm/tree.py | 4 +++- dvc/utils/fs.py | 2 -- tests/func/test_ignore.py | 21 +++++++++++++++++++-- tests/func/test_repo.py | 3 +-- tests/func/test_tree.py | 10 +++++----- 8 files changed, 36 insertions(+), 21 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 54b04f5077..8fdf8298ad 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -77,10 +77,9 @@ class DvcIgnoreFilter(object): def __init__(self, tree): self.tree = tree self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])} - self._update(self.tree.tree_root) - for root, dirs, _ in self.tree.walk(self.tree.tree_root): - for d in dirs: - self._update(os.path.join(root, d)) + for root, dirs, files in self.tree.walk(self.tree.tree_root): + self._update(root) + dirs[:], files[:] = self(root, dirs, files) def _update(self, dirname): ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index 99b6aab792..7213cb37a6 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -59,7 +59,7 @@ def brancher( # noqa: E302 # code which might expect the tree on which exception was raised to # stay in place. This behavior is a subject to change. for sha, names in group_by(scm.resolve_rev, revs).items(): - self.tree = scm.get_tree(sha) + self.tree = CleanTree(scm.get_tree(sha)) yield ", ".join(names) self.tree = saved_tree diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 194c714fd6..96177503f9 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -8,7 +8,6 @@ from pathspec.patterns import GitWildMatchPattern from dvc.exceptions import GitHookAlreadyExistsError -from dvc.ignore import CleanTree from dvc.scm.base import Base from dvc.scm.base import CloneError from dvc.scm.base import FileNotInRepoError @@ -308,7 +307,7 @@ def belongs_to_scm(self, path): return basename == self.ignore_file or Git.GIT_DIR in path_parts def get_tree(self, rev): - return CleanTree(GitTree(self.repo, rev)) + return GitTree(self.repo, rev) def _get_diff_trees(self, a_ref, b_ref): """Private method for getting the trees and commit hashes of 2 git @@ -322,6 +321,7 @@ def _get_diff_trees(self, a_ref, b_ref): tuple: tuple with elements: (trees, commits) """ from gitdb.exc import BadObject, BadName + from dvc.ignore import CleanTree trees = {DIFF_A_TREE: None, DIFF_B_TREE: None} commits = [] @@ -334,8 +334,8 @@ def _get_diff_trees(self, a_ref, b_ref): # /en/2.1.11/reference.html#git.objects.base.Object.__str__ commits.append(a_commit) commits.append(b_commit) - trees[DIFF_A_TREE] = self.get_tree(commits[0]) - trees[DIFF_B_TREE] = self.get_tree(commits[1]) + trees[DIFF_A_TREE] = CleanTree(self.get_tree(commits[0])) + trees[DIFF_B_TREE] = CleanTree(self.get_tree(commits[1])) except (BadName, BadObject) as e: raise SCMError("git problem", cause=e) return trees, commits diff --git a/dvc/scm/tree.py b/dvc/scm/tree.py index e95af7d57f..5e28beae96 100644 --- a/dvc/scm/tree.py +++ b/dvc/scm/tree.py @@ -1,6 +1,6 @@ import os -from dvc.utils.compat import open +from dvc.utils.compat import open, fspath class BaseTree(object): @@ -65,6 +65,8 @@ def walk(self, top, topdown=True): - it could raise exceptions, there is no onerror argument """ + top = fspath(top) + def onerror(e): raise e diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index 533cdc01e9..ebb4b6cbcd 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -34,8 +34,6 @@ def get_inode(path): return inode -# FIXME: There is no use case of get_mtime_and_size without tree, we should -# move it to tree def get_mtime_and_size(path, tree): if os.path.isdir(fspath_py35(path)): size = 0 diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 6adfb4484e..22fef716e3 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -5,7 +5,8 @@ import pytest from dvc.exceptions import DvcIgnoreInCollectedDirError -from dvc.ignore import DvcIgnore +from dvc.ignore import DvcIgnore, DvcIgnoreDirs, DvcIgnorePatterns, CleanTree +from dvc.scm.tree import WorkingTree from dvc.utils import relpath from dvc.utils.compat import fspath_py35 from dvc.utils.compat import fspath @@ -89,6 +90,22 @@ def test_dvcignore_in_out_dir(tmp_dir, dvc): dvc.add("dir") +@pytest.mark.parametrize("dname", ["dir", "dir/subdir"]) +def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): + tmp_dir.gen({"dir": {"subdir": {}}}) + + top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) + top_ignore_file.write_text(os.path.basename(dname)) + + ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE + ignore_file.write_text("foo") + + assert dvc.tree.dvcignore.ignores == { + DvcIgnoreDirs([".git", ".hg", ".dvc"]), + DvcIgnorePatterns(fspath(top_ignore_file), WorkingTree(dvc.root_dir)), + } + + def test_ignore_on_branch(tmp_dir, scm, dvc): tmp_dir.scm_gen({"foo": "foo", "bar": "bar"}, commit="add files") @@ -98,7 +115,7 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): scm.checkout("master") assert _files_set(".", dvc.tree) == {"./foo", "./bar"} - tree = scm.get_tree("branch") + tree = CleanTree(scm.get_tree("branch")) files_on_branch = { f for r, ds, fs in tree.walk(tree.tree_root) for f in fs } diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 8bb7abfeda..51f3a87543 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -1,6 +1,5 @@ import os -from dvc.ignore import CleanTree from dvc.scm.git.tree import GitTree from dvc.cache import Cache from dvc.repo import Repo @@ -51,7 +50,7 @@ def collect_outs(*args, **kwargs): assert collect_outs("bar.dvc", with_deps=True) == {"foo", "bar"} - dvc.tree = CleanTree(GitTree(scm.repo, "new-branch")) + dvc.tree = GitTree(scm.repo, "new-branch") assert collect_outs("buzz.dvc", with_deps=True) == {"foo", "bar", "buzz"} assert collect_outs("buzz.dvc", with_deps=False) == {"buzz"} diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 29d8d5e211..ee38be6ae4 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -82,14 +82,14 @@ class TestGitTree(TestGit, GitTreeTests): def setUp(self): super(TestGitTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = CleanTree(GitTree(self.git, "master")) + self.tree = GitTree(self.git, "master") class TestGitSubmoduleTree(TestGitSubmodule, GitTreeTests): def setUp(self): super(TestGitSubmoduleTree, self).setUp() self.scm = SCM(self._root_dir) - self.tree = CleanTree(GitTree(self.git, "master")) + self.tree = GitTree(self.git, "master") self._pushd(self._root_dir) @@ -108,7 +108,7 @@ def convert_to_sets(walk_results): class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir): def test(self): - tree = CleanTree(WorkingTree(self._root_dir)) + tree = WorkingTree(self._root_dir) self.assertWalkEqual( tree.walk(self._root_dir), [ @@ -127,7 +127,7 @@ def test(self): ) def test_subdir(self): - tree = CleanTree(WorkingTree(self._root_dir)) + tree = WorkingTree(self._root_dir) self.assertWalkEqual( tree.walk(join("data_dir", "data_sub_dir")), [(join("data_dir", "data_sub_dir"), [], ["data_sub"])], @@ -154,7 +154,7 @@ def test_branch(self): scm = SCM(self._root_dir) scm.add([self.DATA_SUB_DIR]) scm.commit("add data_dir/data_sub_dir/data_sub") - tree = CleanTree(GitTree(self.git, "master")) + tree = GitTree(self.git, "master") self.assertWalkEqual( tree.walk("."), [ From e1eab9405cb10c992d4bde5a25dd469f02488d6f Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 27 Dec 2019 21:54:47 +0100 Subject: [PATCH 10/12] tree: check that repo.tree is CleanTree when using walk_files --- dvc/remote/local.py | 5 +++++ dvc/utils/fs.py | 4 ++++ tests/func/test_ignore.py | 7 ++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 81b63d0618..7a68ef824c 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -13,6 +13,7 @@ from dvc.exceptions import DownloadError from dvc.exceptions import DvcException from dvc.exceptions import UploadError +from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.progress import Tqdm from dvc.remote.base import RemoteBASE @@ -138,6 +139,8 @@ def getsize(path_info): return os.path.getsize(fspath_py35(path_info)) def walk_files(self, path_info): + assert isinstance(self.repo.tree, CleanTree) + for fname in self.repo.tree.walk_files(path_info): yield PathInfo(fname) @@ -427,6 +430,8 @@ def _unprotect_file(path): os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE) def _unprotect_dir(self, path): + assert isinstance(self.repo.tree, CleanTree) + for fname in self.repo.tree.walk_files(path): RemoteLOCAL._unprotect_file(fname) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index ebb4b6cbcd..4f1a8013a0 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -35,6 +35,10 @@ def get_inode(path): def get_mtime_and_size(path, tree): + from dvc.ignore import CleanTree + + assert isinstance(tree, CleanTree) + if os.path.isdir(fspath_py35(path)): size = 0 files_mtimes = {} diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 22fef716e3..68137300a5 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -116,10 +116,11 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): assert _files_set(".", dvc.tree) == {"./foo", "./bar"} tree = CleanTree(scm.get_tree("branch")) - files_on_branch = { - f for r, ds, fs in tree.walk(tree.tree_root) for f in fs + + assert _files_set(".", tree) == { + os.path.join(tree.tree_root, DvcIgnore.DVCIGNORE_FILE), + os.path.join(tree.tree_root, "bar"), } - assert "foo" not in files_on_branch def _files_set(root, tree): From 691d771d560891897ae682a91654696c0a743745 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 30 Dec 2019 14:15:34 +0100 Subject: [PATCH 11/12] remove CleanTree from GitTree completely --- dvc/remote/local.py | 5 ++++- dvc/repo/__init__.py | 3 ++- dvc/repo/diff.py | 5 +++-- dvc/scm/git/__init__.py | 5 ++--- tests/func/test_checkout.py | 6 ++---- tests/func/test_ignore.py | 4 ++-- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 7a68ef824c..56a1381140 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -22,6 +22,7 @@ from dvc.remote.base import STATUS_MISSING from dvc.remote.base import STATUS_NEW from dvc.scheme import Schemes +from dvc.scm.tree import WorkingTree from dvc.system import System from dvc.utils import copyfile from dvc.utils import file_md5 @@ -139,7 +140,9 @@ def getsize(path_info): return os.path.getsize(fspath_py35(path_info)) def walk_files(self, path_info): - assert isinstance(self.repo.tree, CleanTree) + assert isinstance(self.repo.tree, CleanTree) and isinstance( + self.repo.tree.tree, WorkingTree + ) for fname in self.repo.tree.walk_files(path_info): yield PathInfo(fname) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index ce3ce71178..62efa14e77 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -5,11 +5,12 @@ from contextlib import contextmanager from functools import wraps from itertools import chain + +from dvc.ignore import CleanTree from dvc.utils.compat import FileNotFoundError, fspath_py35, open as _open from funcy import cached_property -from dvc.scm.tree import CleanTree from dvc.config import Config from dvc.exceptions import ( FileMissingError, diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index 6b681bedca..b9c35f2a8a 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -4,6 +4,7 @@ from errno import ENOENT import dvc.logger as logger +from dvc.ignore import CleanTree from . import locked from dvc.scm.base import FileNotInCommitError from dvc.scm.git import DIFF_A_REF @@ -137,9 +138,9 @@ def _is_dir(path, a_outs, b_outs): def _get_diff_outs(self, diff_dct): - self.tree = diff_dct[DIFF_A_TREE] + self.tree = CleanTree(diff_dct[DIFF_A_TREE]) a_outs = {str(out): out for st in self.stages for out in st.outs} - self.tree = diff_dct[DIFF_B_TREE] + self.tree = CleanTree(diff_dct[DIFF_B_TREE]) b_outs = {str(out): out for st in self.stages for out in st.outs} outs_paths = set(a_outs.keys()) outs_paths.update(b_outs.keys()) diff --git a/dvc/scm/git/__init__.py b/dvc/scm/git/__init__.py index 96177503f9..668dc6fd57 100644 --- a/dvc/scm/git/__init__.py +++ b/dvc/scm/git/__init__.py @@ -321,7 +321,6 @@ def _get_diff_trees(self, a_ref, b_ref): tuple: tuple with elements: (trees, commits) """ from gitdb.exc import BadObject, BadName - from dvc.ignore import CleanTree trees = {DIFF_A_TREE: None, DIFF_B_TREE: None} commits = [] @@ -334,8 +333,8 @@ def _get_diff_trees(self, a_ref, b_ref): # /en/2.1.11/reference.html#git.objects.base.Object.__str__ commits.append(a_commit) commits.append(b_commit) - trees[DIFF_A_TREE] = CleanTree(self.get_tree(commits[0])) - trees[DIFF_B_TREE] = CleanTree(self.get_tree(commits[1])) + trees[DIFF_A_TREE] = self.get_tree(commits[0]) + trees[DIFF_B_TREE] = self.get_tree(commits[1]) except (BadName, BadObject) as e: raise SCMError("git problem", cause=e) return trees, commits diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 4f83f2cf83..2baa455f94 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -18,7 +18,7 @@ from dvc.stage import StageFileBadNameError from dvc.stage import StageFileDoesNotExistError from dvc.system import System -from dvc.utils import relpath +from dvc.utils import relpath, walk_files from dvc.utils.compat import is_py2 from dvc.utils.stage import dump_stage_file from dvc.utils.stage import load_stage_file @@ -519,6 +519,4 @@ def test_partial_checkout(tmp_dir, dvc, target): tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}}) shutil.rmtree("dir") dvc.checkout([target]) - assert list(walk_files("dir", None)) == [ - os.path.join("dir", "subdir", "file") - ] + assert list(walk_files("dir")) == [os.path.join("dir", "subdir", "file")] diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 68137300a5..c581cfd746 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -118,8 +118,8 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): tree = CleanTree(scm.get_tree("branch")) assert _files_set(".", tree) == { - os.path.join(tree.tree_root, DvcIgnore.DVCIGNORE_FILE), - os.path.join(tree.tree_root, "bar"), + to_posixpath(os.path.join(tree.tree_root, DvcIgnore.DVCIGNORE_FILE)), + to_posixpath(os.path.join(tree.tree_root, "bar")), } From 1880656579ceca43775253689d3adc1f1b608b1a Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 30 Dec 2019 17:05:45 +0100 Subject: [PATCH 12/12] CleanTree: conform to snake_case naming style --- dvc/ignore.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dvc/ignore.py b/dvc/ignore.py index 8fdf8298ad..64f79068cb 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -118,10 +118,10 @@ def isfile(self, path): return self.tree.isfile(path) def walk(self, top, topdown=True): - for r, d, fs in self.tree.walk(top, topdown): - d[:], fs[:] = self.dvcignore(r, d, fs) + for root, dirs, files in self.tree.walk(top, topdown): + dirs[:], files[:] = self.dvcignore(root, dirs, files) - yield r, d, fs + yield root, dirs, files def walk_files(self, top): for root, _, files in self.walk(top):