From f29ad51e16780bb6bed1ae99a5087706b11098ce Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Tue, 28 Dec 2021 02:49:14 +0200 Subject: [PATCH 1/2] fs: detach from local filesystem (#24) This PR makes our fs accept paths relative to the root of the repo and using / as a separator. Previous behaviour was a legacy from old dvc. Pre-requisite for making dvcfs/repofs/etc switch to the same relative repo paths and / separators. --- scmrepo/fs.py | 22 ++++++---------------- tests/test_fs.py | 26 +++++++++++--------------- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/scmrepo/fs.py b/scmrepo/fs.py index cb8c5046..dfebae84 100644 --- a/scmrepo/fs.py +++ b/scmrepo/fs.py @@ -1,5 +1,6 @@ import errno import os +import posixpath from typing import ( TYPE_CHECKING, Any, @@ -31,7 +32,6 @@ def bytesio_len(obj: "BytesIO") -> Optional[int]: class GitFileSystem(AbstractFileSystem): # pylint: disable=abstract-method - sep = os.sep cachable = False def __init__( @@ -53,20 +53,12 @@ def __init__( resolved = resolver(scm, rev or "HEAD") tree_obj = scm.pygit2.get_tree_obj(rev=resolved) trie = GitTrie(tree_obj, resolved) - path = scm.root_dir - else: - assert path self.trie = trie - self.root_dir = path self.rev = self.trie.rev def _get_key(self, path: str) -> Tuple[str, ...]: - from scmrepo.utils import relpath - - if os.path.isabs(path): - path = relpath(path, self.root_dir) - relparts = path.split(os.sep) + relparts = path.split(self.sep) if relparts == ["."]: return () return tuple(relparts) @@ -101,7 +93,7 @@ def info(self, path: str, **kwargs: Any) -> Dict[str, Any]: try: return { **self.trie.info(key), - "name": os.path.join(self.root_dir, self.sep.join(key)), + "name": path, } except KeyError: raise FileNotFoundError( @@ -145,15 +137,13 @@ def walk( # pylint: disable=arguments-differ key = self._get_key(top) for prefix, dirs, files in self.trie.walk(key, topdown=topdown): - root = self.root_dir + root = self.sep.join(prefix) if prefix else "" - if prefix: - root = os.path.join(root, os.sep.join(prefix)) if detail: yield ( root, - {d: self.info(os.path.join(root, d)) for d in dirs}, - {f: self.info(os.path.join(root, f)) for f in files}, + {d: self.info(posixpath.join(root, d)) for d in dirs}, + {f: self.info(posixpath.join(root, f)) for f in files}, ) else: yield root, dirs, files diff --git a/tests/test_fs.py b/tests/test_fs.py index 90d3f921..37bc677f 100644 --- a/tests/test_fs.py +++ b/tests/test_fs.py @@ -1,5 +1,3 @@ -import os - import pytest from pytest_test_utils import TmpDir @@ -34,7 +32,7 @@ def test_exists(tmp_dir: TmpDir, scm: Git): assert not fs.exists("foo") assert not fs.exists("тест") assert not fs.exists("data") - assert not fs.exists(os.path.join("data", "lorem")) + assert not fs.exists("data/lorem") scm.add_commit(files, message="add") @@ -42,7 +40,7 @@ def test_exists(tmp_dir: TmpDir, scm: Git): assert fs.exists("foo") assert fs.exists("тест") assert fs.exists("data") - assert fs.exists(os.path.join("data", "lorem")) + assert fs.exists("data/lorem") assert not fs.exists("non-existing-file") @@ -75,7 +73,7 @@ def test_walk(tmp_dir: TmpDir, scm: Git): "data": {"lorem": "ipsum", "subdir": {"sub": "sub"}}, } ) - scm.add_commit(os.path.join("data", "subdir"), message="add") + scm.add_commit("data/subdir", message="add") fs = scm.get_fs("master") def convert_to_sets(walk_results): @@ -86,22 +84,20 @@ def convert_to_sets(walk_results): assert convert_to_sets(fs.walk(".")) == convert_to_sets( [ - (scm.root_dir, ["data"], []), - (os.path.join(scm.root_dir, "data"), ["subdir"], []), + ("", ["data"], []), + ("data", ["subdir"], []), ( - os.path.join(scm.root_dir, "data", "subdir"), + "data/subdir", [], ["sub"], ), ] ) - assert convert_to_sets( - fs.walk(os.path.join("data", "subdir")) - ) == convert_to_sets( + assert convert_to_sets(fs.walk("data/subdir")) == convert_to_sets( [ ( - os.path.join(scm.root_dir, "data", "subdir"), + "data/subdir", [], ["sub"], ) @@ -124,21 +120,21 @@ def test_ls(tmp_dir: TmpDir, scm: Git): assert fs.ls(".") == { "data": { "mode": 16384, - "name": str(tmp_dir / "data"), + "name": "data", "sha": "f5d6ac1955c85410b71bb6e35e4c57c54e2ad524", "size": 66, "type": "directory", }, "foo": { "mode": 33188, - "name": str(tmp_dir / "foo"), + "name": "foo", "sha": "19102815663d23f8b75a47e7a01965dcdc96468c", "size": 3, "type": "file", }, "тест": { "mode": 33188, - "name": str(tmp_dir / "тест"), + "name": "тест", "sha": "eeeba1738f4c12844163b89112070c6e57eb764e", "size": 16, "type": "file", From 8dd55800ca28d7f6bfb91ca7d304159e178038be Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 29 Dec 2021 13:47:55 +0200 Subject: [PATCH 2/2] fs: support root_marker (#25) fsspec filesystems don't yet have a notion of cwd, so "." should not be used. Instead, we should support using fs.root_marker ("" in our case), which is currently broken. --- scmrepo/fs.py | 2 +- tests/test_fs.py | 6 +++--- tests/test_git.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scmrepo/fs.py b/scmrepo/fs.py index dfebae84..c01da0e4 100644 --- a/scmrepo/fs.py +++ b/scmrepo/fs.py @@ -59,7 +59,7 @@ def __init__( def _get_key(self, path: str) -> Tuple[str, ...]: relparts = path.split(self.sep) - if relparts == ["."]: + if relparts == [self.root_marker]: return () return tuple(relparts) diff --git a/tests/test_fs.py b/tests/test_fs.py index 37bc677f..c6e8113a 100644 --- a/tests/test_fs.py +++ b/tests/test_fs.py @@ -82,7 +82,7 @@ def convert_to_sets(walk_results): for root, dirs, nondirs in walk_results ] - assert convert_to_sets(fs.walk(".")) == convert_to_sets( + assert convert_to_sets(fs.walk("")) == convert_to_sets( [ ("", ["data"], []), ("data", ["subdir"], []), @@ -116,8 +116,8 @@ def test_ls(tmp_dir: TmpDir, scm: Git): scm.add_commit(files, message="add") fs = scm.get_fs("master") - assert fs.ls(".", detail=False) == ["foo", "тест", "data"] - assert fs.ls(".") == { + assert fs.ls("", detail=False) == ["foo", "тест", "data"] + assert fs.ls("") == { "data": { "mode": 16384, "name": "data", diff --git a/tests/test_git.py b/tests/test_git.py index bf51bb03..d0eb1919 100644 --- a/tests/test_git.py +++ b/tests/test_git.py @@ -126,7 +126,7 @@ def test_walk_with_submodules( files = [] dirs = [] fs = scm.get_fs("HEAD") - for _, dnames, fnames in fs.walk("."): + for _, dnames, fnames in fs.walk(""): dirs.extend(dnames) files.extend(fnames)