diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index e02d52116a..2222decf27 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -76,9 +76,9 @@ def onerror(exc): if not recursive: for dname in dirs: info = PathInfo(root) / dname - if not dvc_only or ( - tree.dvctree and tree.dvctree.exists(info) - ): + # pylint:disable=protected-access + _, dvctree = tree._get_tree_pairs(info) # noqa + if not dvc_only or (dvctree and dvctree.exists(info)): dvc = tree.isdvc(info) path = str(info.relative_to(path_info)) ret[path] = { diff --git a/dvc/repo/tree.py b/dvc/repo/tree.py index 8011d0318c..9140c6fa71 100644 --- a/dvc/repo/tree.py +++ b/dvc/repo/tree.py @@ -1,5 +1,11 @@ import logging import os +import threading +from itertools import takewhile +from typing import Optional, Tuple + +from funcy import wrap_with +from pygtrie import StringTrie from dvc.dvcfile import is_valid_filename from dvc.exceptions import OutputNotFoundError @@ -248,25 +254,82 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method scheme = "local" PARAM_CHECKSUM = "md5" - def __init__(self, repo, **kwargs): + def __init__(self, repo, subrepos=False, repo_factory=None, **kwargs): super().__init__(repo, {"url": repo.root_dir}) - if hasattr(repo, "dvc_dir"): - self.dvctree = DvcTree(repo, **kwargs) + + if not repo_factory: + from dvc.repo import Repo + + self.repo_factory = Repo else: - # git-only erepo's do not need dvctree - self.dvctree = None + self.repo_factory = repo_factory + + self._main_repo = repo + self.root_dir = repo.root_dir + self._traverse_subrepos = subrepos + + self._discovered_subrepos = StringTrie(separator=os.sep) + self._discovered_subrepos[self.root_dir] = repo + + self._dvctrees = {} + self._dvctree_configs = kwargs + + if hasattr(repo, "dvc_dir"): + self._dvctrees[repo.root_dir] = DvcTree(repo, **kwargs) + + def _get_repo(self, path): + repo = self._discovered_subrepos.get(path) + if repo: + return repo + + prefix, repo = self._discovered_subrepos.longest_prefix(path) + if not prefix: + return None + + parents = (parent.fspath for parent in PathInfo(path).parents) + dirs = [path] + list(takewhile(lambda p: p != prefix, parents)) + dirs.reverse() + self._update(dirs, starting_repo=repo) + return self._discovered_subrepos.get(path) + + @wrap_with(threading.Lock()) + def _update(self, dirs, starting_repo): + repo = starting_repo + for d in dirs: + if self._is_dvc_repo(d): + repo = self.repo_factory(d) + self._dvctrees[repo.root_dir] = DvcTree( + repo, **self._dvctree_configs + ) + self._discovered_subrepos[d] = repo + + def _is_dvc_repo(self, dir_path): + if not self._traverse_subrepos: + return False + + from dvc.repo import Repo + + repo_path = os.path.join(dir_path, Repo.DVC_DIR) + return self._main_repo.tree.isdir(repo_path, use_dvcignore=False) + + def _get_tree_pairs(self, path) -> Tuple["BaseTree", Optional["DvcTree"]]: + path = os.path.abspath(path) + repo = self._get_repo(path) + if not repo: + # path could be outside of the repo, so we just send them the main + # tree instead + return self._main_repo.tree, self._dvctrees.get(self.root_dir) + + dvc_tree = self._dvctrees.get(repo.root_dir) + return repo.tree, dvc_tree @property def fetch(self): - if self.dvctree: - return self.dvctree.fetch - return False + return "fetch" in self._dvctree_configs @property def stream(self): - if self.dvctree: - return self.dvctree.stream - return False + return "stream" in self._dvctree_configs def open( self, path, mode="r", encoding="utf-8", **kwargs @@ -274,52 +337,65 @@ def open( if "b" in mode: encoding = None - if self.dvctree and self.dvctree.exists(path): - return self.dvctree.open( - path, mode=mode, encoding=encoding, **kwargs - ) - return self.repo.tree.open(path, mode=mode, encoding=encoding) + tree, dvc_tree = self._get_tree_pairs(path) + if dvc_tree and dvc_tree.exists(path): + return dvc_tree.open(path, mode=mode, encoding=encoding, **kwargs) + return tree.open(path, mode=mode, encoding=encoding) def exists( self, path, use_dvcignore=True ): # pylint: disable=arguments-differ - return self.repo.tree.exists(path) or ( - self.dvctree and self.dvctree.exists(path) - ) + tree, dvc_tree = self._get_tree_pairs(path) + return tree.exists(path) or (dvc_tree and dvc_tree.exists(path)) def isdir(self, path): # pylint: disable=arguments-differ - return self.repo.tree.isdir(path) or ( - self.dvctree and self.dvctree.isdir(path) - ) + tree, dvc_tree = self._get_tree_pairs(path) + return tree.isdir(path) or (dvc_tree and dvc_tree.isdir(path)) def isdvc(self, path, **kwargs): - return self.dvctree is not None and self.dvctree.isdvc(path, **kwargs) + _, dvc_tree = self._get_tree_pairs(path) + return dvc_tree is not None and dvc_tree.isdvc(path, **kwargs) def isfile(self, path): # pylint: disable=arguments-differ - return self.repo.tree.isfile(path) or ( - self.dvctree and self.dvctree.isfile(path) - ) + tree, dvc_tree = self._get_tree_pairs(path) + return tree.isfile(path) or (dvc_tree and dvc_tree.isfile(path)) def isexec(self, path): - if self.dvctree and self.dvctree.exists(path): - return self.dvctree.isexec(path) - return self.repo.tree.isexec(path) + tree, dvc_tree = self._get_tree_pairs(path) + if dvc_tree and dvc_tree.exists(path): + return dvc_tree.isexec(path) + return tree.isexec(path) def stat(self, path): - return self.repo.tree.stat(path) + tree, _ = self._get_tree_pairs(path) + return tree.stat(path) - def _walk_one(self, walk): + def _dvc_walk(self, walk): try: root, dirs, files = next(walk) except StopIteration: return yield root, dirs, files for _ in dirs: - yield from self._walk_one(walk) + yield from self._dvc_walk(walk) + + def _subrepo_walk(self, dir_path, **kwargs): + tree, dvc_tree = self._get_tree_pairs(dir_path) + tree_walk = tree.walk( + dir_path, topdown=True, ignore_subrepos=not self._traverse_subrepos + ) + if dvc_tree: + dvc_walk = dvc_tree.walk(dir_path, topdown=True, **kwargs) + else: + dvc_walk = None + yield from self._walk(tree_walk, dvc_walk, **kwargs) - def _walk(self, dvc_walk, repo_walk, dvcfiles=False): + def _walk(self, repo_walk, dvc_walk=None, dvcfiles=False): + assert repo_walk try: - _, dvc_dirs, dvc_fnames = next(dvc_walk) + _, dvc_dirs, dvc_fnames = ( + next(dvc_walk) if dvc_walk else (None, [], []) + ) repo_root, repo_dirs, repo_fnames = next(repo_walk) except StopIteration: return @@ -347,12 +423,15 @@ def _walk(self, dvc_walk, repo_walk, dvcfiles=False): repo_dirs[:] = [dirname for dirname in dirs if dirname in repo_set] for dirname in dirs: - if dirname in shared: - yield from self._walk(dvc_walk, repo_walk, dvcfiles=dvcfiles) + dir_path = os.path.join(repo_root, dirname) + if self._is_dvc_repo(dir_path): + yield from self._subrepo_walk(dir_path, dvcfiles=dvcfiles) + elif dirname in shared: + yield from self._walk(repo_walk, dvc_walk, dvcfiles=dvcfiles) elif dirname in dvc_set: - yield from self._walk_one(dvc_walk) + yield from self._dvc_walk(dvc_walk) elif dirname in repo_set: - yield from self._walk_one(repo_walk) + yield from self._walk(repo_walk, None, dvcfiles=dvcfiles) def walk( self, top, topdown=True, onerror=None, dvcfiles=False, **kwargs @@ -382,22 +461,28 @@ def walk( onerror(NotADirectoryError(top)) return - dvc_exists = self.dvctree and self.dvctree.exists(top) - repo_exists = self.repo.tree.exists(top) - if dvc_exists and not repo_exists: - yield from self.dvctree.walk( - top, topdown=topdown, onerror=onerror, **kwargs - ) - return - if repo_exists and not dvc_exists: - yield from self.repo.tree.walk( - top, topdown=topdown, onerror=onerror + tree, dvc_tree = self._get_tree_pairs(top) + dvc_exists = dvc_tree and dvc_tree.exists(top) + repo_exists = tree.exists(top) + if dvc_exists: + dvc_walk = dvc_tree.walk(top, topdown=topdown, **kwargs) + if repo_exists: + repo_walk = tree.walk( + top, + topdown=topdown, + ignore_subrepos=not self._traverse_subrepos, + ) + yield from self._walk(repo_walk, dvc_walk, dvcfiles=dvcfiles) + else: + yield from dvc_walk + else: + repo_walk = tree.walk( + top, + topdown=topdown, + onerror=onerror, + ignore_subrepos=not self._traverse_subrepos, ) - return - - dvc_walk = self.dvctree.walk(top, topdown=topdown, **kwargs) - repo_walk = self.repo.tree.walk(top, topdown=topdown) - yield from self._walk(dvc_walk, repo_walk, dvcfiles=dvcfiles) + yield from self._walk(repo_walk, None, dvcfiles=dvcfiles) def walk_files(self, top, **kwargs): # pylint: disable=arguments-differ for root, _, files in self.walk(top, **kwargs): @@ -413,9 +498,10 @@ def get_file_hash(self, path_info): """ if not self.exists(path_info): raise FileNotFoundError - if self.dvctree and self.dvctree.exists(path_info): + _, dvc_tree = self._get_tree_pairs(path_info) + if dvc_tree and dvc_tree.exists(path_info): try: - return self.dvctree.get_file_hash(path_info) + return dvc_tree.get_file_hash(path_info) except OutputNotFoundError: pass return file_md5(path_info, self)[0] @@ -444,4 +530,4 @@ def copytree(self, top, dest): @property def hash_jobs(self): # pylint: disable=invalid-overridden-method - return self.repo.tree.hash_jobs + return self._main_repo.tree.hash_jobs diff --git a/dvc/tree/git.py b/dvc/tree/git.py index e4f2548982..066f77be6d 100644 --- a/dvc/tree/git.py +++ b/dvc/tree/git.py @@ -240,3 +240,6 @@ def walk_files(self, top): # pylint: disable=arguments-differ for file in files: # NOTE: os.path.join is ~5.5 times slower yield f"{root}{os.sep}{file}" + + def _reset(self): + return self.__dict__.pop("dvcignore", None) diff --git a/dvc/tree/local.py b/dvc/tree/local.py index ca3b8cfbfe..994277aea3 100644 --- a/dvc/tree/local.py +++ b/dvc/tree/local.py @@ -357,3 +357,6 @@ def _remove_unpacked_dir(self, hash_): info = self.hash_to_path_info(hash_) path_info = info.with_name(info.name + self.UNPACKED_DIR_SUFFIX) self.remove(path_info) + + def _reset(self): + return self.__dict__.pop("dvcignore", None) diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index 042329f79c..86937234e1 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -21,7 +21,7 @@ def test_ignore(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) @@ -31,7 +31,7 @@ def test_ignore(tmp_dir, dvc, monkeypatch): def test_ignore_unicode(tmp_dir, dvc): tmp_dir.gen({"dir": {"other": "text", "тест": "проверка"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/тест") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == {path / "dir" / "other"} @@ -40,7 +40,7 @@ def test_rename_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored*") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() mtime, size = get_mtime_and_size("dir", dvc.tree) @@ -63,7 +63,7 @@ def test_rename_file(tmp_dir, dvc): def test_remove_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/ignored") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() mtime, size = get_mtime_and_size("dir", dvc.tree) @@ -96,7 +96,7 @@ def test_ignore_collecting_dvcignores(tmp_dir, dvc, dname): top_ignore_file = (tmp_dir / dname).with_name(DvcIgnore.DVCIGNORE_FILE) top_ignore_file.write_text(os.path.basename(dname)) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() ignore_file = tmp_dir / dname / DvcIgnore.DVCIGNORE_FILE ignore_file.write_text("foo") @@ -127,7 +127,7 @@ def test_ignore_on_branch(tmp_dir, scm, dvc): with tmp_dir.branch("branch", new=True): tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, "foo", commit="add ignore") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path)) == { path / "foo", @@ -150,7 +150,7 @@ def test_match_nested(tmp_dir, dvc): "dir": {"x.backup": "x backup", "tmp": "content"}, } ) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() result = {os.fspath(os.path.normpath(f)) for f in dvc.tree.walk_files(".")} assert result == {".dvcignore", "foo"} @@ -168,7 +168,7 @@ def test_ignore_subrepo(tmp_dir, scm, dvc): tmp_dir.gen({".dvcignore": "foo", "subdir": {"foo": "foo"}}) scm.add([".dvcignore"]) scm.commit("init parent dvcignore") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() subrepo_dir = tmp_dir / "subdir" assert not dvc.tree.exists(PathInfo(subrepo_dir / "foo")) @@ -189,7 +189,7 @@ def test_ignore_resurface_subrepo(tmp_dir, scm, dvc): with subrepo_dir.chdir(): Repo.init(subdir=True) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() dirs = ["subdir"] files = ["foo"] @@ -207,7 +207,7 @@ def test_ignore_resurface_subrepo(tmp_dir, scm, dvc): def test_ignore_blank_line(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored") - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == {path / "dir" / "other"} @@ -242,7 +242,7 @@ def test_ignore_file_in_parent_path( ): tmp_dir.gen(data_struct) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "\n".join(pattern_list)) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == { path / relpath for relpath in result_set @@ -265,7 +265,7 @@ def test_ignore_sub_directory(tmp_dir, dvc): ) tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "doc/fortz"}}) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == { path / "dir" / "a" / "doc" / "fortz" / "a", @@ -277,7 +277,7 @@ def test_ignore_sub_directory(tmp_dir, dvc): def test_ignore_directory(tmp_dir, dvc): tmp_dir.gen({"dir": {"fortz": {}, "a": {"fortz": {}}}}) tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "fortz"}}) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == { path / "dir" / DvcIgnore.DVCIGNORE_FILE, @@ -288,7 +288,7 @@ def test_multi_ignore_file(tmp_dir, dvc, monkeypatch): tmp_dir.gen({"dir": {"subdir": {"should_ignore": "1", "not_ignore": "1"}}}) tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "dir/subdir/*_ignore") tmp_dir.gen({"dir": {DvcIgnore.DVCIGNORE_FILE: "!subdir/not_ignore"}}) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() path = PathInfo(tmp_dir) assert set(dvc.tree.walk_files(path / "dir")) == { path / "dir" / "subdir" / "not_ignore", @@ -313,7 +313,7 @@ def test_pattern_trie_tree(tmp_dir, dvc): "other": {DvcIgnore.DVCIGNORE_FILE: "1\n2\n3"}, } ) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() dvcignore = dvc.tree.dvcignore ignore_pattern_top = dvcignore._get_trie_pattern( @@ -381,7 +381,7 @@ def test_ignore_in_added_dir(tmp_dir, dvc): ".dvcignore": "**/ignored", } ) - dvc.tree.__dict__.pop("dvcignore", None) + dvc.tree._reset() ignored_path = tmp_dir / "dir" / "sub" / "ignored" assert not dvc.tree.exists(PathInfo(ignored_path)) diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index a53a515a2e..7f90196beb 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -258,7 +258,7 @@ def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): scm.commit("Add subrepo") dvc_tree = dvc.tree - dvc_tree.__dict__.pop("dvcignore") + dvc_tree._reset() scm_tree = scm.get_tree("HEAD", use_dvcignore=True) path = os.fspath(tmp_dir) get_dirs = itemgetter(1) diff --git a/tests/unit/repo/test_repo_tree.py b/tests/unit/repo/test_repo_tree.py index 417b2c0b92..9a4d5112c1 100644 --- a/tests/unit/repo/test_repo_tree.py +++ b/tests/unit/repo/test_repo_tree.py @@ -1,5 +1,6 @@ import os import shutil +from unittest import mock import pytest @@ -177,3 +178,181 @@ def test_isdvc(tmp_dir, dvc): assert tree.isdvc("dir") assert not tree.isdvc("dir/baz") assert tree.isdvc("dir/baz", recursive=True, strict=False) + + +def make_subrepo(dir_, scm, config=None): + dir_.mkdir(parents=True) + with dir_.chdir(): + dir_.scm = scm + dir_.init(dvc=True, subdir=True) + if config: + dir_.add_remote(config=config) + + +def test_subrepos(tmp_dir, scm, dvc): + tmp_dir.scm_gen( + {"dir": {"repo.txt": "file to confuse RepoTree"}}, + commit="dir/repo.txt", + ) + + subrepo1 = tmp_dir / "dir" / "repo" + subrepo2 = tmp_dir / "dir" / "repo2" + + for repo in [subrepo1, subrepo2]: + make_subrepo(repo, scm) + + subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") + subrepo2.dvc_gen( + {"lorem": "lorem", "dir2": {"ipsum": "ipsum"}}, commit="BAR" + ) + + dvc.tree._reset() + tree = RepoTree(dvc, subrepos=True, fetch=True) + + def assert_tree_belongs_to_repo(ret_val): + method = tree._get_repo + + def f(*args, **kwargs): + r = method(*args, **kwargs) + assert r.root_dir == ret_val.root_dir + return r + + return f + + with mock.patch.object( + tree, + "_get_repo", + side_effect=assert_tree_belongs_to_repo(subrepo1.dvc), + ): + assert tree.exists(subrepo1 / "foo") is True + assert tree.exists(subrepo1 / "bar") is False + + assert tree.isfile(subrepo1 / "foo") is True + assert tree.isfile(subrepo1 / "foo") is True + assert tree.isfile(subrepo1 / "dir1" / "bar") is True + assert tree.isfile(subrepo1 / "dir1") is False + + assert tree.isdir(subrepo1 / "dir1") is True + assert tree.isdir(subrepo1 / "dir1" / "bar") is False + assert tree.isdvc(subrepo1 / "foo") is True + + with mock.patch.object( + tree, + "_get_repo", + side_effect=assert_tree_belongs_to_repo(subrepo2.dvc), + ): + assert tree.exists(subrepo2 / "lorem") is True + assert tree.exists(subrepo2 / "ipsum") is False + + assert tree.isfile(subrepo2 / "lorem") is True + assert tree.isfile(subrepo2 / "lorem") is True + assert tree.isfile(subrepo2 / "dir2" / "ipsum") is True + assert tree.isfile(subrepo2 / "dir2") is False + + assert tree.isdir(subrepo2 / "dir2") is True + assert tree.isdir(subrepo2 / "dir2" / "ipsum") is False + assert tree.isdvc(subrepo2 / "lorem") is True + + +@pytest.mark.parametrize( + "dvcfiles,extra_expected", + [ + (False, []), + ( + True, + [ + PathInfo("dir") / "repo" / "foo.dvc", + PathInfo("dir") / "repo" / "dir1.dvc", + PathInfo("dir") / "repo2" / "lorem.dvc", + PathInfo("dir") / "repo2" / "dir2.dvc", + ], + ), + ], +) +def test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected): + tmp_dir.scm_gen( + {"dir": {"repo.txt": "file to confuse RepoTree"}}, + commit="dir/repo.txt", + ) + + subrepo1 = tmp_dir / "dir" / "repo" + subrepo2 = tmp_dir / "dir" / "repo2" + + subdirs = [subrepo1, subrepo2] + for dir_ in subdirs: + make_subrepo(dir_, scm) + + subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") + subrepo2.dvc_gen( + {"lorem": "lorem", "dir2": {"ipsum": "ipsum"}}, commit="BAR" + ) + + # using tree that does not have dvcignore + dvc.tree._reset() + tree = RepoTree(dvc, subrepos=True, fetch=True) + expected = [ + PathInfo("dir") / "repo", + PathInfo("dir") / "repo.txt", + PathInfo("dir") / "repo2", + PathInfo("dir") / "repo" / ".gitignore", + PathInfo("dir") / "repo" / "foo", + PathInfo("dir") / "repo" / "dir1", + PathInfo("dir") / "repo" / "dir1" / "bar", + PathInfo("dir") / "repo2" / ".gitignore", + PathInfo("dir") / "repo2" / "lorem", + PathInfo("dir") / "repo2" / "dir2", + PathInfo("dir") / "repo2" / "dir2" / "ipsum", + ] + + actual = [] + for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles): + for entry in dirs + files: + actual.append(os.path.join(root, entry)) + + expected = [str(path) for path in expected + extra_expected] + assert set(actual) == set(expected) + assert len(actual) == len(expected) + + +def test_repo_tree_no_subrepos(tmp_dir, dvc, scm): + tmp_dir.scm_gen( + {"dir": {"repo.txt": "file to confuse RepoTree"}}, + commit="dir/repo.txt", + ) + tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo") + + subrepo = tmp_dir / "dir" / "repo" + make_subrepo(subrepo, scm) + subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") + subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR") + + # using tree that does not have dvcignore + dvc.tree._reset() + tree = RepoTree(dvc, subrepos=False, fetch=True) + expected = [ + tmp_dir / ".gitignore", + tmp_dir / "lorem", + tmp_dir / "lorem.dvc", + tmp_dir / "dir", + tmp_dir / "dir" / "repo.txt", + ] + + actual = [] + for root, dirs, files in tree.walk(tmp_dir, dvcfiles=True): + for entry in dirs + files: + actual.append(os.path.normpath(os.path.join(root, entry))) + + expected = [str(path) for path in expected] + assert set(actual) == set(expected) + assert len(actual) == len(expected) + + assert tree.isfile(tmp_dir / "lorem") is True + assert tree.isfile(tmp_dir / "dir" / "repo" / "foo") is False + assert tree.isdir(tmp_dir / "dir" / "repo") is False + assert tree.isdir(tmp_dir / "dir") is True + + assert tree.isdvc(tmp_dir / "lorem") is True + assert tree.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False + + assert tree.exists(tmp_dir / "dir" / "repo.txt") is True + assert tree.exists(tmp_dir / "repo" / "ipsum") is False