diff --git a/dvc/remote/azure.py b/dvc/remote/azure.py index 817fcc6ed0..77b831fc19 100644 --- a/dvc/remote/azure.py +++ b/dvc/remote/azure.py @@ -121,6 +121,8 @@ def _list_paths(self, bucket, prefix): next_marker = blobs.next_marker def walk_files(self, path_info, **kwargs): + if not kwargs.pop("prefix", False): + path_info = path_info / "" for fname in self._list_paths( path_info.bucket, path_info.path, **kwargs ): diff --git a/dvc/remote/base.py b/dvc/remote/base.py index b7d5cafa8e..91b116e700 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -213,7 +213,12 @@ def iscopy(self, path_info): return False # We can't be sure by default def walk_files(self, path_info, **kwargs): - """Return a generator with `PathInfo`s to all the files""" + """Return a generator with `PathInfo`s to all the files. + + Optional kwargs: + prefix (bool): If true `path_info` will be treated as a prefix + rather than directory path. + """ raise NotImplementedError def is_empty(self, path_info): @@ -522,14 +527,16 @@ def list_paths(self, prefix=None, progress_callback=None): path_info = self.path_info / prefix[:2] / prefix[2:] else: path_info = self.path_info / prefix[:2] + prefix = True else: path_info = self.path_info + prefix = False if progress_callback: - for file_info in self.walk_files(path_info): + for file_info in self.walk_files(path_info, prefix=prefix): progress_callback() yield file_info.path else: - yield from self.walk_files(path_info) + yield from self.walk_files(path_info, prefix=prefix) def list_hashes(self, prefix=None, progress_callback=None): """Iterate over hashes in this tree. diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 874622f404..c6df9a886c 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -553,7 +553,7 @@ def _list_paths(self, prefix=None): ) def walk_files(self, path_info, **kwargs): - if path_info == self.path_info: + if path_info == self.path_info or not kwargs.pop("prefix", False): prefix = None else: prefix = path_info.path diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 0775f0af51..fc703231d2 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -144,7 +144,9 @@ def _list_paths(self, path_info, max_items=None): yield blob.name def walk_files(self, path_info, **kwargs): - for fname in self._list_paths(path_info / "", **kwargs): + if not kwargs.pop("prefix", False): + path_info = path_info / "" + for fname in self._list_paths(path_info, **kwargs): # skip nested empty directories if fname.endswith("/"): continue diff --git a/dvc/remote/oss.py b/dvc/remote/oss.py index 31feaed6ad..5471169afb 100644 --- a/dvc/remote/oss.py +++ b/dvc/remote/oss.py @@ -100,6 +100,8 @@ def _list_paths(self, path_info): yield blob.key def walk_files(self, path_info, **kwargs): + if not kwargs.pop("prefix", False): + path_info = path_info / "" for fname in self._list_paths(path_info): if fname.endswith("/"): continue diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 9a1911da95..2c01fd800e 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -187,7 +187,9 @@ def _list_paths(self, path_info, max_items=None): ) def walk_files(self, path_info, **kwargs): - for fname in self._list_paths(path_info / "", **kwargs): + if not kwargs.pop("prefix", False): + path_info = path_info / "" + for fname in self._list_paths(path_info, **kwargs): if fname.endswith("/"): continue diff --git a/tests/unit/remote/test_base.py b/tests/unit/remote/test_base.py index c2025f1c24..016d71271a 100644 --- a/tests/unit/remote/test_base.py +++ b/tests/unit/remote/test_base.py @@ -142,6 +142,20 @@ def test_list_hashes(dvc): assert hashes == ["123456"] +def test_list_paths(dvc): + tree = BaseRemoteTree(dvc, {}) + tree.path_info = PathInfo("foo") + + with mock.patch.object(tree, "walk_files", return_value=[]) as walk_mock: + for _ in tree.list_paths(): + pass + walk_mock.assert_called_with(tree.path_info, prefix=False) + + for _ in tree.list_paths(prefix="000"): + pass + walk_mock.assert_called_with(tree.path_info / "00" / "0", prefix=True) + + @pytest.mark.parametrize( "hash_, result", [(None, False), ("", False), ("3456.dir", True), ("3456", False)],