Skip to content

Commit df5fda2

Browse files
authored
Merge pull request #2974 from pared/2914
repo: move dvcignore from repo to tree
2 parents 0a3a9bf + 1880656 commit df5fda2

File tree

16 files changed

+130
-129
lines changed

16 files changed

+130
-129
lines changed

dvc/ignore.py

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
import logging
44
import os
55

6+
from funcy import cached_property
67
from pathspec import PathSpec
78
from pathspec.patterns import GitWildMatchPattern
89

10+
from dvc.scm.tree import BaseTree
911
from dvc.utils import relpath
1012

1113
logger = logging.getLogger(__name__)
@@ -72,13 +74,12 @@ def __eq__(self, other):
7274

7375

7476
class DvcIgnoreFilter(object):
75-
def __init__(self, root_dir, tree):
77+
def __init__(self, tree):
7678
self.tree = tree
7779
self.ignores = {DvcIgnoreDirs([".git", ".hg", ".dvc"])}
78-
self._update(root_dir)
79-
for root, dirs, _ in self.tree.walk(root_dir, dvcignore=self):
80-
for d in dirs:
81-
self._update(os.path.join(root, d))
80+
for root, dirs, files in self.tree.walk(self.tree.tree_root):
81+
self._update(root)
82+
dirs[:], files[:] = self(root, dirs, files)
8283

8384
def _update(self, dirname):
8485
ignore_file_path = os.path.join(dirname, DvcIgnore.DVCIGNORE_FILE)
@@ -90,3 +91,39 @@ def __call__(self, root, dirs, files):
9091
dirs, files = ignore(root, dirs, files)
9192

9293
return dirs, files
94+
95+
96+
class CleanTree(BaseTree):
97+
def __init__(self, tree):
98+
self.tree = tree
99+
100+
@cached_property
101+
def dvcignore(self):
102+
return DvcIgnoreFilter(self.tree)
103+
104+
@property
105+
def tree_root(self):
106+
return self.tree.tree_root
107+
108+
def open(self, path, mode="r", encoding="utf-8"):
109+
return self.tree.open(path, mode, encoding)
110+
111+
def exists(self, path):
112+
return self.tree.exists(path)
113+
114+
def isdir(self, path):
115+
return self.tree.isdir(path)
116+
117+
def isfile(self, path):
118+
return self.tree.isfile(path)
119+
120+
def walk(self, top, topdown=True):
121+
for root, dirs, files in self.tree.walk(top, topdown):
122+
dirs[:], files[:] = self.dvcignore(root, dirs, files)
123+
124+
yield root, dirs, files
125+
126+
def walk_files(self, top):
127+
for root, _, files in self.walk(top):
128+
for file in files:
129+
yield os.path.join(root, file)

dvc/remote/local.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from dvc.exceptions import DownloadError
1414
from dvc.exceptions import DvcException
1515
from dvc.exceptions import UploadError
16+
from dvc.ignore import CleanTree
1617
from dvc.path_info import PathInfo
1718
from dvc.progress import Tqdm
1819
from dvc.remote.base import RemoteBASE
@@ -21,6 +22,7 @@
2122
from dvc.remote.base import STATUS_MISSING
2223
from dvc.remote.base import STATUS_NEW
2324
from dvc.scheme import Schemes
25+
from dvc.scm.tree import WorkingTree
2426
from dvc.system import System
2527
from dvc.utils import copyfile
2628
from dvc.utils import file_md5
@@ -83,7 +85,7 @@ def supported(cls, config):
8385

8486
def list_cache_paths(self):
8587
assert self.path_info is not None
86-
return walk_files(self.path_info, None)
88+
return walk_files(self.path_info)
8789

8890
def get(self, md5):
8991
if not md5:
@@ -138,7 +140,11 @@ def getsize(path_info):
138140
return os.path.getsize(fspath_py35(path_info))
139141

140142
def walk_files(self, path_info):
141-
for fname in walk_files(path_info, self.repo.dvcignore):
143+
assert isinstance(self.repo.tree, CleanTree) and isinstance(
144+
self.repo.tree.tree, WorkingTree
145+
)
146+
147+
for fname in self.repo.tree.walk_files(path_info):
142148
yield PathInfo(fname)
143149

144150
def get_file_checksum(self, path_info):
@@ -427,7 +433,9 @@ def _unprotect_file(path):
427433
os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
428434

429435
def _unprotect_dir(self, path):
430-
for fname in walk_files(path, self.repo.dvcignore):
436+
assert isinstance(self.repo.tree, CleanTree)
437+
438+
for fname in self.repo.tree.walk_files(path):
431439
RemoteLOCAL._unprotect_file(fname)
432440

433441
def unprotect(self, path_info):

dvc/repo/__init__.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from contextlib import contextmanager
66
from functools import wraps
77
from itertools import chain
8+
9+
from dvc.ignore import CleanTree
810
from dvc.utils.compat import FileNotFoundError, fspath_py35, open as _open
911

1012
from funcy import cached_property
@@ -15,7 +17,6 @@
1517
NotDvcRepoError,
1618
OutputNotFoundError,
1719
)
18-
from dvc.ignore import DvcIgnoreFilter
1920
from dvc.path_info import PathInfo
2021
from dvc.remote.base import RemoteActionNotImplemented
2122
from dvc.utils import relpath
@@ -84,7 +85,7 @@ def __init__(self, root_dir=None):
8485

8586
self.scm = SCM(self.root_dir)
8687

87-
self.tree = WorkingTree(self.root_dir)
88+
self.tree = CleanTree(WorkingTree(self.root_dir))
8889

8990
self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
9091
makedirs(self.tmp_dir, exist_ok=True)
@@ -391,9 +392,7 @@ def stages(self):
391392
stages = []
392393
outs = []
393394

394-
for root, dirs, files in self.tree.walk(
395-
self.root_dir, dvcignore=self.dvcignore
396-
):
395+
for root, dirs, files in self.tree.walk(self.root_dir):
397396
for fname in files:
398397
path = os.path.join(root, fname)
399398
if not Stage.is_valid_filename(path):
@@ -487,10 +486,6 @@ def _open_cached(self, out, remote=None, mode="r", encoding=None):
487486

488487
return _open(cache_file, mode=mode, encoding=encoding)
489488

490-
@cached_property
491-
def dvcignore(self):
492-
return DvcIgnoreFilter(self.root_dir, self.tree)
493-
494489
def close(self):
495490
self.scm.close()
496491

dvc/repo/add.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from dvc.repo.scm_context import scm_context
1313
from dvc.stage import Stage
1414
from dvc.utils import LARGE_DIR_SIZE
15-
from dvc.utils import walk_files
1615

1716
logger = logging.getLogger(__name__)
1817

@@ -68,7 +67,7 @@ def _find_all_targets(repo, target, recursive):
6867
if os.path.isdir(target) and recursive:
6968
return [
7069
fname
71-
for fname in walk_files(target, repo.dvcignore)
70+
for fname in repo.tree.walk_files(target)
7271
if not repo.is_dvc_internal(fname)
7372
if not Stage.is_stage_file(fname)
7473
if not repo.scm.belongs_to_scm(fname)

dvc/repo/brancher.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from funcy import group_by
22

3+
from dvc.ignore import CleanTree
34
from dvc.scm.tree import WorkingTree
45

56

@@ -34,7 +35,7 @@ def brancher( # noqa: E302
3435

3536
scm = self.scm
3637

37-
self.tree = WorkingTree(self.root_dir)
38+
self.tree = CleanTree(WorkingTree(self.root_dir))
3839
yield "working tree"
3940

4041
if all_commits:
@@ -58,7 +59,7 @@ def brancher( # noqa: E302
5859
# code which might expect the tree on which exception was raised to
5960
# stay in place. This behavior is a subject to change.
6061
for sha, names in group_by(scm.resolve_rev, revs).items():
61-
self.tree = scm.get_tree(sha)
62+
self.tree = CleanTree(scm.get_tree(sha))
6263
yield ", ".join(names)
6364

6465
self.tree = saved_tree

dvc/repo/diff.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from errno import ENOENT
55

66
import dvc.logger as logger
7+
from dvc.ignore import CleanTree
78
from . import locked
89
from dvc.scm.base import FileNotInCommitError
910
from dvc.scm.git import DIFF_A_REF
@@ -137,9 +138,9 @@ def _is_dir(path, a_outs, b_outs):
137138

138139

139140
def _get_diff_outs(self, diff_dct):
140-
self.tree = diff_dct[DIFF_A_TREE]
141+
self.tree = CleanTree(diff_dct[DIFF_A_TREE])
141142
a_outs = {str(out): out for st in self.stages for out in st.outs}
142-
self.tree = diff_dct[DIFF_B_TREE]
143+
self.tree = CleanTree(diff_dct[DIFF_B_TREE])
143144
b_outs = {str(out): out for st in self.stages for out in st.outs}
144145
outs_paths = set(a_outs.keys())
145146
outs_paths.update(b_outs.keys())

dvc/scm/git/tree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def _walk(self, tree, topdown=True):
136136
if not topdown:
137137
yield os.path.normpath(tree.abspath), dirs, nondirs
138138

139-
def walk(self, top, topdown=True, dvcignore=None):
139+
def walk(self, top, topdown=True):
140140
"""Directory tree generator.
141141
142142
See `os.walk` for the docs. Differences:

dvc/scm/tree.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import os
22

3-
from dvc.utils import dvc_walk
4-
from dvc.utils.compat import open
3+
from dvc.utils.compat import open, fspath
54

65

76
class BaseTree(object):
@@ -23,7 +22,7 @@ def isdir(self, path):
2322
def isfile(self, path):
2423
"""Test whether a path is a regular file"""
2524

26-
def walk(self, top, topdown=True, dvcignore=None):
25+
def walk(self, top, topdown=True):
2726
"""Directory tree generator.
2827
2928
See `os.walk` for the docs. Differences:
@@ -58,20 +57,20 @@ def isfile(self, path):
5857
"""Test whether a path is a regular file"""
5958
return os.path.isfile(path)
6059

61-
def walk(self, top, topdown=True, dvcignore=None):
60+
def walk(self, top, topdown=True):
6261
"""Directory tree generator.
6362
6463
See `os.walk` for the docs. Differences:
6564
- no support for symlinks
6665
- it could raise exceptions, there is no onerror argument
6766
"""
6867

69-
assert dvcignore
68+
top = fspath(top)
7069

7170
def onerror(e):
7271
raise e
7372

74-
for root, dirs, files in dvc_walk(
75-
os.path.abspath(top), dvcignore, topdown=topdown, onerror=onerror
73+
for root, dirs, files in os.walk(
74+
top, topdown=topdown, onerror=onerror
7675
):
7776
yield os.path.normpath(root), dirs, files

dvc/state.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def save(self, path_info, checksum):
378378
assert os.path.exists(fspath_py35(path_info))
379379

380380
actual_mtime, actual_size = get_mtime_and_size(
381-
path_info, self.repo.dvcignore
381+
path_info, self.repo.tree
382382
)
383383
actual_inode = get_inode(path_info)
384384

@@ -410,9 +410,7 @@ def get(self, path_info):
410410
if not os.path.exists(path):
411411
return None
412412

413-
actual_mtime, actual_size = get_mtime_and_size(
414-
path, self.repo.dvcignore
415-
)
413+
actual_mtime, actual_size = get_mtime_and_size(path, self.repo.tree)
416414
actual_inode = get_inode(path)
417415

418416
existing_record = self.get_state_record_for_inode(actual_inode)
@@ -439,7 +437,7 @@ def save_link(self, path_info):
439437
if not os.path.exists(path):
440438
return
441439

442-
mtime, _ = get_mtime_and_size(path, self.repo.dvcignore)
440+
mtime, _ = get_mtime_and_size(path, self.repo.tree)
443441
inode = get_inode(path)
444442
relative_path = relpath(path, self.root_dir)
445443

@@ -469,7 +467,7 @@ def remove_unused_links(self, used):
469467
continue
470468

471469
actual_inode = get_inode(path)
472-
actual_mtime, _ = get_mtime_and_size(path, self.repo.dvcignore)
470+
actual_mtime, _ = get_mtime_and_size(path, self.repo.tree)
473471

474472
if inode == actual_inode and mtime == actual_mtime:
475473
logger.debug("Removing '{}' as unused link.".format(path))

dvc/utils/__init__.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -288,25 +288,8 @@ def to_yaml_string(data):
288288
return stream.getvalue()
289289

290290

291-
def dvc_walk(top, dvcignore, topdown=True, onerror=None, followlinks=False):
292-
"""
293-
Proxy for `os.walk` directory tree generator.
294-
Utilizes DvcIgnoreFilter functionality.
295-
"""
296-
top = fspath_py35(top)
297-
298-
for root, dirs, files in os.walk(
299-
top, topdown=topdown, onerror=onerror, followlinks=followlinks
300-
):
301-
302-
if dvcignore:
303-
dirs[:], files[:] = dvcignore(root, dirs, files)
304-
305-
yield root, dirs, files
306-
307-
308-
def walk_files(directory, dvcignore):
309-
for root, _, files in dvc_walk(directory, dvcignore):
291+
def walk_files(directory):
292+
for root, _, files in os.walk(fspath(directory)):
310293
for f in files:
311294
yield os.path.join(root, f)
312295

0 commit comments

Comments
 (0)