From c72363c0c1768d693f4ded2d12a38b127504723e Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 15 Dec 2023 03:01:36 +0200 Subject: [PATCH] deps: update dvc-data to >=3 --- dvc/cachemgr.py | 4 +- dvc/commands/dag.py | 2 +- dvc/config.py | 16 +++---- dvc/data_cloud.py | 6 +-- dvc/dependency/base.py | 8 ++-- dvc/dvcfile.py | 2 +- dvc/fs/__init__.py | 10 ++-- dvc/fs/data.py | 3 ++ dvc/fs/dvc.py | 81 +++++++++++++++++++++++---------- dvc/fs/git.py | 8 ++-- dvc/ignore.py | 42 ++++++++--------- dvc/output.py | 50 +++++++++----------- dvc/parsing/__init__.py | 6 +-- dvc/parsing/context.py | 4 +- dvc/repo/__init__.py | 32 ++++++------- dvc/repo/add.py | 2 +- dvc/repo/artifacts.py | 4 +- dvc/repo/brancher.py | 24 +++++----- dvc/repo/checkout.py | 6 +-- dvc/repo/data.py | 6 +-- dvc/repo/experiments/stash.py | 2 +- dvc/repo/fetch.py | 2 +- dvc/repo/graph.py | 6 +-- dvc/repo/index.py | 22 ++++----- dvc/repo/ls.py | 2 +- dvc/repo/ls_url.py | 2 +- dvc/repo/metrics/show.py | 14 +++--- dvc/repo/open_repo.py | 2 +- dvc/repo/params/show.py | 12 ++--- dvc/repo/plots/__init__.py | 12 ++--- dvc/repo/stage.py | 6 +-- dvc/repo/trie.py | 2 +- dvc/repo/worktree.py | 13 +++--- dvc/rwlock.py | 6 +-- dvc/stage/__init__.py | 2 +- dvc/stage/cache.py | 18 ++++---- dvc/stage/utils.py | 4 +- dvc/testing/workspace_tests.py | 6 +-- dvc/utils/__init__.py | 2 +- dvc/utils/serialize/__init__.py | 2 +- pyproject.toml | 28 ++++++------ tests/func/test_fs.py | 18 ++++---- tests/unit/fs/test_path.py | 29 ------------ tests/unit/test_ignore.py | 9 ++-- 44 files changed, 267 insertions(+), 270 deletions(-) delete mode 100644 tests/unit/fs/test_path.py diff --git a/dvc/cachemgr.py b/dvc/cachemgr.py index 13ce73644e..bbb7a02a67 100644 --- a/dvc/cachemgr.py +++ b/dvc/cachemgr.py @@ -27,7 +27,7 @@ def _get_odb( cls, config, fs_path = get_cloud_fs(repo.config, **settings) fs = fs or cls(**config) if prefix: - fs_path = fs.path.join(fs_path, *prefix) + fs_path = fs.join(fs_path, *prefix) if hash_name: config["hash_name"] = hash_name return get_odb(fs, fs_path, state=repo.state, **config) @@ -86,7 +86,7 @@ def fs_cache(self): return FileStorage( key=(), fs=self.local.fs, - path=self.local.fs.path.join(self.default_local_cache_dir, self.FS_DIR), + path=self.local.fs.join(self.default_local_cache_dir, self.FS_DIR), ) def _init_odb(self, schemes): diff --git a/dvc/commands/dag.py b/dvc/commands/dag.py index 6ae327dc4c..d119edca67 100644 --- a/dvc/commands/dag.py +++ b/dvc/commands/dag.py @@ -86,7 +86,7 @@ def _collect_targets(repo, target, outs): targets.extend([str(out) for out in stage.outs]) continue - for out in outs_trie.itervalues(prefix=repo.fs.path.parts(path)): + for out in outs_trie.itervalues(prefix=repo.fs.parts(path)): targets.extend(str(out)) return targets diff --git a/dvc/config.py b/dvc/config.py index 998ddca960..94d0198016 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -14,7 +14,7 @@ if TYPE_CHECKING: from dvc.fs import FileSystem - from dvc.types import DictStrAny, StrPath + from dvc.types import DictStrAny logger = logger.getChild(__name__) @@ -89,8 +89,8 @@ class Config(dict): def __init__( self, - dvc_dir: Optional["StrPath"] = None, - local_dvc_dir: Optional["StrPath"] = None, + dvc_dir: Optional[str] = None, + local_dvc_dir: Optional[str] = None, validate: bool = True, fs: Optional["FileSystem"] = None, config: Optional["DictStrAny"] = None, @@ -105,7 +105,7 @@ def __init__( self.fs = fs or self.wfs if dvc_dir: - self.dvc_dir = self.fs.path.abspath(dvc_dir) + self.dvc_dir = self.fs.abspath(dvc_dir) self.local_dvc_dir = local_dvc_dir if not fs and not local_dvc_dir: @@ -145,10 +145,10 @@ def files(self) -> Dict[str, str]: } if self.dvc_dir is not None: - files["repo"] = self.fs.path.join(self.dvc_dir, self.CONFIG) + files["repo"] = self.fs.join(self.dvc_dir, self.CONFIG) if self.local_dvc_dir is not None: - files["local"] = self.wfs.path.join(self.local_dvc_dir, self.CONFIG_LOCAL) + files["local"] = self.wfs.join(self.local_dvc_dir, self.CONFIG_LOCAL) return files @@ -302,11 +302,11 @@ def _to_relpath(conf_dir, path): return path.def_path if os.path.expanduser(path) != path: - return localfs.path.as_posix(path) + return localfs.as_posix(path) if isinstance(path, RelPath) or not os.path.isabs(path): path = relpath(path, conf_dir) - return localfs.path.as_posix(path) + return localfs.as_posix(path) return path diff --git a/dvc/data_cloud.py b/dvc/data_cloud.py index 8888999dd3..2722b0c923 100644 --- a/dvc/data_cloud.py +++ b/dvc/data_cloud.py @@ -35,11 +35,9 @@ def odb(self) -> "HashFileDB": path = self.path if self.worktree: - path = self.fs.path.join( - path, ".dvc", CacheManager.FILES_DIR, DEFAULT_ALGORITHM - ) + path = self.fs.join(path, ".dvc", CacheManager.FILES_DIR, DEFAULT_ALGORITHM) else: - path = self.fs.path.join(path, CacheManager.FILES_DIR, DEFAULT_ALGORITHM) + path = self.fs.join(path, CacheManager.FILES_DIR, DEFAULT_ALGORITHM) return get_odb(self.fs, path, hash_name=DEFAULT_ALGORITHM, **self.config) @cached_property diff --git a/dvc/dependency/base.py b/dvc/dependency/base.py index f4e5d3951d..649805fe0d 100644 --- a/dvc/dependency/base.py +++ b/dvc/dependency/base.py @@ -33,7 +33,7 @@ def workspace_status(self) -> Dict[str, str]: if self.fs.version_aware: old_fs_path = self.fs_path try: - self.fs_path = self.fs.path.version_path(self.fs_path, None) + self.fs_path = self.fs.version_path(self.fs_path, None) if self.changed_meta(): return {str(self): "update available"} finally: @@ -42,9 +42,9 @@ def workspace_status(self) -> Dict[str, str]: def update(self, rev=None): if self.fs.version_aware: - self.fs_path = self.fs.path.version_path(self.fs_path, rev) + self.fs_path = self.fs.version_path(self.fs_path, rev) self.meta = self.get_meta() - self.fs_path = self.fs.path.version_path(self.fs_path, self.meta.version_id) + self.fs_path = self.fs.version_path(self.fs_path, self.meta.version_id) def download(self, to, jobs=None): fs_download(self.fs, self.fs_path, to.fs_path, jobs=jobs) @@ -52,7 +52,7 @@ def download(self, to, jobs=None): def save(self): super().save() if self.fs.version_aware: - self.fs_path = self.fs.path.version_path(self.fs_path, self.meta.version_id) + self.fs_path = self.fs.version_path(self.fs_path, self.meta.version_id) def dumpd(self, **kwargs): if self.fs.version_aware: diff --git a/dvc/dvcfile.py b/dvc/dvcfile.py index b6d5e22b18..cad6537722 100644 --- a/dvc/dvcfile.py +++ b/dvc/dvcfile.py @@ -294,7 +294,7 @@ def lockfile_contents(self) -> Dict[str, Any]: def resolver(self) -> "DataResolver": from .parsing import DataResolver - wdir = self.repo.fs.path.parent(self.path) + wdir = self.repo.fs.parent(self.path) return DataResolver(self.repo, wdir, self.contents) @cached_property diff --git a/dvc/fs/__init__.py b/dvc/fs/__init__.py index 3404df2178..7504c22d89 100644 --- a/dvc/fs/__init__.py +++ b/dvc/fs/__init__.py @@ -25,7 +25,6 @@ ConfigError, RemoteMissingDepsError, ) -from dvc_objects.fs.path import Path # noqa: F401 from .callbacks import Callback # noqa: F401 from .data import DataFileSystem # noqa: F401 @@ -54,23 +53,20 @@ def download( from .callbacks import TqdmCallback with TqdmCallback( - desc=f"Downloading {fs.path.name(fs_path)}", + desc=f"Downloading {fs.name(fs_path)}", unit="files", ) as cb: # NOTE: We use dvc-objects generic.copy over fs.get since it makes file # download atomic and avoids fsspec glob/regex path expansion. if fs.isdir(fs_path): from_infos = [ - path - for path in fs.find(fs_path) - if not path.endswith(fs.path.flavour.sep) + path for path in fs.find(fs_path) if not path.endswith(fs.flavour.sep) ] if not from_infos: localfs.makedirs(to, exist_ok=True) return 0 to_infos = [ - localfs.path.join(to, *fs.path.relparts(info, fs_path)) - for info in from_infos + localfs.join(to, *fs.relparts(info, fs_path)) for info in from_infos ] else: from_infos = [fs_path] diff --git a/dvc/fs/data.py b/dvc/fs/data.py index 2c259a0621..f3ded6ff01 100644 --- a/dvc/fs/data.py +++ b/dvc/fs/data.py @@ -29,6 +29,9 @@ def fs( return _DataFileSystem(**self.fs_args) + def getcwd(self): + return self.fs.getcwd() + def isdvc(self, path, **kwargs): return self.fs.isdvc(path, **kwargs) diff --git a/dvc/fs/dvc.py b/dvc/fs/dvc.py index 63132bf65a..89ca26eaee 100644 --- a/dvc/fs/dvc.py +++ b/dvc/fs/dvc.py @@ -13,7 +13,6 @@ from dvc.log import logger from dvc_objects.fs.base import FileSystem -from dvc_objects.fs.path import Path from .data import DataFileSystem @@ -71,7 +70,7 @@ def _merge_info(repo, key, fs_info, dvc_info): def _get_dvc_path(dvc_fs, subkey): - return dvc_fs.path.join(*subkey) if subkey else "" + return dvc_fs.join(*subkey) if subkey else "" class _DVCFileSystem(AbstractFileSystem): @@ -145,7 +144,51 @@ def __init__( # noqa: PLR0913 "remote_config": remote_config, } - self.path = Path(self.sep, getcwd=self._getcwd) + def getcwd(self): + relparts: Tuple[str, ...] = () + assert self.repo is not None + if self.repo.fs.isin(self.repo.fs.getcwd(), self.repo.root_dir): + relparts = self.repo.fs.relparts(self.repo.fs.getcwd(), self.repo.root_dir) + return self.root_marker + self.sep.join(relparts) + + @classmethod + def join(cls, *parts: str) -> str: + return posixpath.join(*parts) + + @classmethod + def parts(cls, path: str) -> Tuple[str, ...]: + ret = [] + while True: + path, part = posixpath.split(path) + + if part: + ret.append(part) + continue + + if path: + ret.append(path) + + break + + ret.reverse() + + return tuple(ret) + + def normpath(self, path: str) -> str: + return posixpath.normpath(path) + + def abspath(self, path: str) -> str: + if not posixpath.isabs(path): + path = self.join(self.getcwd(), path) + return self.normpath(path) + + def relpath(self, path: str, start: Optional[str] = None) -> str: + if start is None: + start = "." + return posixpath.relpath(self.abspath(path), start=self.abspath(start)) + + def relparts(self, path: str, start: Optional[str] = None) -> Tuple[str, ...]: + return self.parts(self.relpath(path, start=start)) @functools.cached_property def repo(self): @@ -170,15 +213,6 @@ def repo_factory(self): return self.repo._fs_conf["repo_factory"] - def _getcwd(self): - relparts: Tuple[str, ...] = () - assert self.repo is not None - if self.repo.fs.path.isin(self.repo.fs.path.getcwd(), self.repo.root_dir): - relparts = self.repo.fs.path.relparts( - self.repo.fs.path.getcwd(), self.repo.root_dir - ) - return self.root_marker + self.sep.join(relparts) - @functools.cached_property def fsid(self) -> str: from fsspec.utils import tokenize @@ -192,7 +226,7 @@ def fsid(self) -> str: def _get_key(self, path: "StrPath") -> Key: path = os.fspath(path) - parts = self.repo.fs.path.relparts(path, self.repo.root_dir) + parts = self.repo.fs.relparts(path, self.repo.root_dir) if parts == (os.curdir,): return () return parts @@ -210,13 +244,13 @@ def _subrepos_trie(self): def _get_key_from_relative(self, path) -> Key: path = self._strip_protocol(path) - parts = self.path.relparts(path, self.root_marker) + parts = self.relparts(path, self.root_marker) if parts and parts[0] == os.curdir: return parts[1:] return parts def _from_key(self, parts: Key) -> str: - return self.repo.fs.path.join(self.repo.root_dir, *parts) + return self.repo.fs.join(self.repo.root_dir, *parts) @functools.cached_property def _datafss(self): @@ -282,7 +316,7 @@ def _is_dvc_repo(self, dir_path): from dvc.repo import Repo - repo_path = self.repo.fs.path.join(dir_path, Repo.DVC_DIR) + repo_path = self.repo.fs.join(dir_path, Repo.DVC_DIR) return self.repo.fs.isdir(repo_path) def _get_subrepo_info( @@ -336,7 +370,7 @@ def ls(self, path, detail=True, dvc_only=False, **kwargs): dvc_path = _get_dvc_path(dvc_fs, subkey) with suppress(FileNotFoundError, NotADirectoryError): for info in dvc_fs.ls(dvc_path, detail=True): - dvc_infos[dvc_fs.path.name(info["name"])] = info + dvc_infos[dvc_fs.name(info["name"])] = info dvc_exists = True fs_exists = False @@ -349,7 +383,7 @@ def ls(self, path, detail=True, dvc_only=False, **kwargs): for info in repo.dvcignore.ls( fs, fs_path, detail=True, ignore_subrepos=ignore_subrepos ): - fs_infos[fs.path.name(info["name"])] = info + fs_infos[fs.name(info["name"])] = info fs_exists = True except (FileNotFoundError, NotADirectoryError): pass @@ -368,7 +402,7 @@ def ls(self, path, detail=True, dvc_only=False, **kwargs): if not dvcfiles and _is_dvc_file(name): continue - entry_path = self.path.join(path, name) + entry_path = self.join(path, name) info = _merge_info( repo, (*subkey, name), fs_infos.get(name), dvc_infos.get(name) ) @@ -416,7 +450,7 @@ def _info( # noqa: C901, PLR0912 # NOTE: if some parent in fs_path turns out to be a file, it means # that the whole repofs branch doesn't exist. if dvc_info and not fs_info: - for parent in fs.path.parents(fs_path): + for parent in fs.parents(fs_path): try: if fs.info(parent)["type"] != "directory": dvc_info = None @@ -498,6 +532,9 @@ def _prepare_credentials(self, **config) -> Dict[str, Any]: def fs(self) -> "_DVCFileSystem": return _DVCFileSystem(**self.fs_args) + def getcwd(self): + return self.fs.getcwd() + @property def fsid(self) -> str: return self.fs.fsid @@ -505,10 +542,6 @@ def fsid(self) -> str: def isdvc(self, path, **kwargs) -> bool: return self.fs.isdvc(path, **kwargs) - @property - def path(self) -> Path: - return self.fs.path - @property def repo(self) -> "Repo": return self.fs.repo diff --git a/dvc/fs/git.py b/dvc/fs/git.py index 2317f3cd46..ced434cd65 100644 --- a/dvc/fs/git.py +++ b/dvc/fs/git.py @@ -46,9 +46,11 @@ def fs( return FsspecGitFileSystem(**self.fs_args) - @functools.cached_property - def path(self): - return self.fs.path + def getcwd(self): + return self.fs.getcwd() + + def chdir(self, path): + self.fs.chdir(path) @property def rev(self) -> str: diff --git a/dvc/ignore.py b/dvc/ignore.py index 69cf12fb6c..a39c72b160 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -51,8 +51,8 @@ def __init__(self, pattern_list, dirname, sep): @classmethod def from_file(cls, path, fs, name): - assert fs.path.isabs(path) - dirname = fs.path.normpath(fs.path.dirname(path)) + assert fs.isabs(path) + dirname = fs.normpath(fs.dirname(path)) with fs.open(path, encoding="utf-8") as fobj: path_spec_lines = [ PatternInfo(line, f"{name}:{line_no + 1}:{line}") @@ -194,7 +194,7 @@ def __init__(self, fs, root_dir): ) def _get_key(self, path): - parts = self.fs.path.relparts(path, self.root_dir) + parts = self.fs.relparts(path, self.root_dir) if parts == (os.curdir,): return () return parts @@ -204,13 +204,13 @@ def _update_trie(self, dirname: str, trie: Trie) -> None: old_pattern = trie.longest_prefix(key).value matches = old_pattern.matches(dirname, DvcIgnore.DVCIGNORE_FILE, False) - path = self.fs.path.join(dirname, DvcIgnore.DVCIGNORE_FILE) + path = self.fs.join(dirname, DvcIgnore.DVCIGNORE_FILE) if not matches and self.fs.exists(path): - name = self.fs.path.relpath(path, self.root_dir) + name = self.fs.relpath(path, self.root_dir) new_pattern = DvcIgnorePatterns.from_file(path, self.fs, name) if old_pattern: plist, prefix = merge_patterns( - self.fs.path.flavour, + self.fs.flavour, old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, @@ -239,7 +239,7 @@ def _update( dnames = [] for dname in dnames: - self._update_sub_repo(self.fs.path.join(dirname, dname), ignore_trie) + self._update_sub_repo(self.fs.join(dirname, dname), ignore_trie) def _update_sub_repo(self, path, ignore_trie: Trie): from dvc.repo import Repo @@ -247,18 +247,18 @@ def _update_sub_repo(self, path, ignore_trie: Trie): if path == self.root_dir: return - dvc_dir = self.fs.path.join(path, Repo.DVC_DIR) + dvc_dir = self.fs.join(path, Repo.DVC_DIR) if not self.fs.exists(dvc_dir): return - root, dname = self.fs.path.split(path) + root, dname = self.fs.split(path) key = self._get_key(root) pattern_info = PatternInfo(f"/{dname}/", f"in sub_repo:{dname}") new_pattern = DvcIgnorePatterns([pattern_info], root, self.fs.sep) old_pattern = ignore_trie.longest_prefix(key).value if old_pattern: plist, prefix = merge_patterns( - self.fs.path.flavour, + self.fs.flavour, old_pattern.pattern_list, old_pattern.dirname, new_pattern.pattern_list, @@ -269,7 +269,7 @@ def _update_sub_repo(self, path, ignore_trie: Trie): ignore_trie[key] = new_pattern def __call__(self, root, dirs, files, ignore_subrepos=True): - abs_root = self.fs.path.abspath(root) + abs_root = self.fs.abspath(root) ignore_pattern = self._get_trie_pattern( abs_root, dnames=dirs, ignore_subrepos=ignore_subrepos ) @@ -283,7 +283,7 @@ def ls(self, fs, path, detail=True, **kwargs): nondirs = [] for entry in fs.ls(path, detail=True, **kwargs): - name = fs.path.name(entry["name"]) + name = fs.name(entry["name"]) fs_dict[name] = entry if entry["type"] == "directory": dirs.append(name) @@ -338,7 +338,7 @@ def _get_trie_pattern( else: ignores_trie = self._ignores_trie_subrepos - if not self.fs.path.isin_or_eq(dirname, self.root_dir): + if not self.fs.isin_or_eq(dirname, self.root_dir): # outside of the repo return None @@ -349,12 +349,12 @@ def _get_trie_pattern( return ignore_pattern prefix_key = ignores_trie.longest_prefix(key).key or () - prefix = self.fs.path.join(self.root_dir, *prefix_key) + prefix = self.fs.join(self.root_dir, *prefix_key) dirs = list( takewhile( lambda path: path != prefix, - (parent for parent in localfs.path.parents(dirname)), + (parent for parent in localfs.parents(dirname)), ) ) dirs.reverse() @@ -370,7 +370,7 @@ def _is_ignored( ): if self._outside_repo(path): return False - dirname, basename = self.fs.path.split(self.fs.path.normpath(path)) + dirname, basename = self.fs.split(self.fs.normpath(path)) ignore_pattern = self._get_trie_pattern(dirname, None, ignore_subrepos) if ignore_pattern: return ignore_pattern.matches(dirname, basename, is_dir) @@ -378,7 +378,7 @@ def _is_ignored( def is_ignored_dir(self, path: str, ignore_subrepos: bool = True) -> bool: # only used in LocalFileSystem - path = self.fs.path.abspath(path) + path = self.fs.abspath(path) if path == self.root_dir: return False @@ -386,18 +386,18 @@ def is_ignored_dir(self, path: str, ignore_subrepos: bool = True) -> bool: def is_ignored_file(self, path: str, ignore_subrepos: bool = True) -> bool: # only used in LocalFileSystem - path = self.fs.path.abspath(path) + path = self.fs.abspath(path) return self._is_ignored(path, False, ignore_subrepos=ignore_subrepos) def _outside_repo(self, path): - return not self.fs.path.isin_or_eq(path, self.root_dir) + return not self.fs.isin_or_eq(path, self.root_dir) def check_ignore(self, target): # NOTE: can only be used in `dvc check-ignore`, see # https://github.com/iterative/dvc/issues/5046 - full_target = self.fs.path.abspath(target) + full_target = self.fs.abspath(target) if not self._outside_repo(full_target): - dirname, basename = self.fs.path.split(self.fs.path.normpath(full_target)) + dirname, basename = self.fs.split(self.fs.normpath(full_target)) pattern = self._get_trie_pattern(dirname) if pattern: matches = pattern.matches( diff --git a/dvc/output.py b/dvc/output.py index decc97e058..f122abfc84 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -378,7 +378,7 @@ def __init__( # noqa: PLR0913 if ( self.repo and self.fs.protocol == "local" - and not self.fs.path.isabs(self.def_path) + and not self.fs.isabs(self.def_path) ): self.fs = self.repo.fs @@ -411,7 +411,7 @@ def __init__( # noqa: PLR0913 self.remote = remote if self.fs.version_aware: - _, version_id = self.fs.path.coalesce_version( + _, version_id = self.fs.coalesce_version( self.def_path, self.meta.version_id ) self.meta.version_id = version_id @@ -465,7 +465,7 @@ def _parse_path(self, fs, fs_path): parsed.scheme != "remote" and self.stage and self.stage.repo.fs == fs - and not fs.path.isabs(fs_path) + and not fs.isabs(fs_path) ): # NOTE: we can path either from command line or .dvc file, # so we should expect both posix and windows style paths. @@ -473,9 +473,9 @@ def _parse_path(self, fs, fs_path): # # FIXME: if we have Windows path containing / or posix one with \ # then we have #2059 bug and can't really handle that. - fs_path = fs.path.join(self.stage.wdir, fs_path) + fs_path = fs.join(self.stage.wdir, fs_path) - return fs.path.abspath(fs.path.normpath(fs_path)) + return fs.abspath(fs.normpath(fs_path)) def __repr__(self): return f"{type(self).__name__}: {self.def_path!r}" @@ -491,14 +491,14 @@ def __str__(self): ): return str(self.def_path) - if not self.fs.path.isin(self.fs_path, self.repo.root_dir): + if not self.fs.isin(self.fs_path, self.repo.root_dir): return self.fs_path - cur_dir = self.fs.path.getcwd() - if self.fs.path.isin(cur_dir, self.repo.root_dir): - return self.fs.path.relpath(self.fs_path, cur_dir) + cur_dir = self.fs.getcwd() + if self.fs.isin(cur_dir, self.repo.root_dir): + return self.fs.relpath(self.fs_path, cur_dir) - return self.fs.path.relpath(self.fs_path, self.repo.root_dir) + return self.fs.relpath(self.fs_path, self.repo.root_dir) def clear(self): self.hash_info = HashInfo.from_dict({}) @@ -515,10 +515,10 @@ def is_in_repo(self): if urlparse(self.def_path).scheme == "remote": return False - if self.fs.path.isabs(self.def_path): + if self.fs.isabs(self.def_path): return False - return self.repo and self.fs.path.isin( + return self.repo and self.fs.isin( self.fs_path, self.repo.root_dir, ) @@ -607,11 +607,11 @@ def exists(self): def index_key(self) -> Tuple[str, "DataIndexKey"]: if self.is_in_repo: workspace = "repo" - key = self.repo.fs.path.relparts(self.fs_path, self.repo.root_dir) + key = self.repo.fs.relparts(self.fs_path, self.repo.root_dir) else: workspace = self.fs.protocol - no_drive = self.fs.path.flavour.splitdrive(self.fs_path)[1] - key = self.fs.path.parts(no_drive)[1:] + no_drive = self.fs.flavour.splitdrive(self.fs_path)[1] + key = self.fs.parts(no_drive)[1:] return workspace, key def changed_checksum(self): @@ -799,7 +799,7 @@ def commit(self, filter_info=None, relink=True) -> None: callback=cb, ) if relink: - rel = self.fs.path.relpath(filter_info or self.fs_path) + rel = self.fs.relpath(filter_info or self.fs_path) with CheckoutCallback(desc=f"Checking out {rel}", unit="files") as cb: self._checkout( filter_info or self.fs_path, @@ -814,7 +814,7 @@ def commit(self, filter_info=None, relink=True) -> None: self.set_exec() def _commit_granular_dir(self, filter_info, hardlink) -> Optional["HashFile"]: - prefix = self.fs.path.parts(self.fs.path.relpath(filter_info, self.fs_path)) + prefix = self.fs.parts(self.fs.relpath(filter_info, self.fs_path)) staging, _, obj = self._build( self.cache, self.fs_path, @@ -862,7 +862,7 @@ def dumpd(self, **kwargs): # noqa: C901, PLR0912 ret.update(split_file_meta_from_cloud(meta_d)) if self.is_in_repo: - path = self.fs.path.as_posix(relpath(self.fs_path, self.stage.wdir)) + path = self.fs.as_posix(relpath(self.fs_path, self.stage.wdir)) else: path = self.def_path @@ -949,7 +949,7 @@ def get_obj( return None assert obj - fs_path = self.fs.path + fs_path = self.fs if filter_info and filter_info != self.fs_path: prefix = fs_path.relparts(filter_info, self.fs_path) assert isinstance(obj, Tree) @@ -1154,7 +1154,7 @@ def _collect_used_dir_cache( assert obj is None or isinstance(obj, Tree) if filter_info and filter_info != self.fs_path: assert obj - prefix = self.fs.path.parts(self.fs.path.relpath(filter_info, self.fs_path)) + prefix = self.fs.parts(self.fs.relpath(filter_info, self.fs_path)) return obj.filter(prefix) return obj @@ -1297,10 +1297,7 @@ def merge(self, ancestor, other, allowed=None): def unstage(self, path: str) -> Tuple["Meta", "Tree"]: from pygtrie import Trie - from dvc_objects.fs.path import Path - - assert isinstance(self.fs.path, Path) - rel_key = tuple(self.fs.path.parts(self.fs.path.relpath(path, self.fs_path))) + rel_key = tuple(self.fs.parts(self.fs.relpath(path, self.fs_path))) if self.hash_info: tree = self.get_dir_cache() @@ -1333,11 +1330,8 @@ def apply( ) -> Tuple["Meta", "Tree"]: from pygtrie import Trie - from dvc_objects.fs.path import Path - - assert isinstance(self.fs.path, Path) append_only = True - rel_key = tuple(self.fs.path.parts(self.fs.path.relpath(path, self.fs_path))) + rel_key = tuple(self.fs.parts(self.fs.relpath(path, self.fs_path))) if self.hash_info: tree = self.get_dir_cache() diff --git a/dvc/parsing/__init__.py b/dvc/parsing/__init__.py index 90c437ea2c..a318bc7b27 100644 --- a/dvc/parsing/__init__.py +++ b/dvc/parsing/__init__.py @@ -143,11 +143,11 @@ def __init__(self, repo: "Repo", wdir: str, d: dict): self.parsing_config = repo.config.get("parsing", {}) if os.path.isabs(wdir): - wdir = fs.path.relpath(wdir) + wdir = fs.relpath(wdir) wdir = "" if wdir == os.curdir else wdir self.wdir = wdir - self.relpath = fs.path.normpath(fs.path.join(self.wdir, "dvc.yaml")) + self.relpath = fs.normpath(fs.join(self.wdir, "dvc.yaml")) vars_ = d.get(VARS_KWD, []) check_interpolations(vars_, VARS_KWD, self.relpath) @@ -248,7 +248,7 @@ def _resolve_wdir( wdir = to_str(context.resolve_str(wdir)) except (ContextError, ParseError) as exc: format_and_raise(exc, f"'{self.where}.{name}.wdir'", self.relpath) - return self.resolver.fs.path.join(self.wdir, wdir) + return self.resolver.fs.join(self.wdir, wdir) def resolve(self, **kwargs): try: diff --git a/dvc/parsing/context.py b/dvc/parsing/context.py index 97b7280013..45b5ffdeef 100644 --- a/dvc/parsing/context.py +++ b/dvc/parsing/context.py @@ -384,7 +384,7 @@ def merge_update(self, other: "Context", overwrite=False): def merge_from(self, fs, item: str, wdir: str, overwrite=False): path, _, keys_str = item.partition(":") - path = fs.path.normpath(fs.path.join(wdir, path)) + path = fs.normpath(fs.join(wdir, path)) select_keys = lfilter(bool, keys_str.split(",")) if keys_str else None if path in self.imports: @@ -429,7 +429,7 @@ def load_from_vars( default: Optional[str] = None, ): if default: - to_import = fs.path.join(wdir, default) + to_import = fs.join(wdir, default) if fs.exists(to_import): self.merge_from(fs, default, wdir) else: diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 2c6f965bf5..66559a52fe 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -114,7 +114,7 @@ def _get_repo_dirs( try: root_dir = self.find_root(root_dir, fs) fs = fs or localfs - dvc_dir = fs.path.join(root_dir, self.DVC_DIR) + dvc_dir = fs.join(root_dir, self.DVC_DIR) except NotDvcRepoError: if not uninitialized: raise @@ -207,7 +207,7 @@ def __init__( # noqa: PLR0915, PLR0913 self.fs.makedirs(self.tmp_dir, exist_ok=True) self.lock = make_lock( - self.fs.path.join(self.tmp_dir, "lock"), + self.fs.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, @@ -251,7 +251,7 @@ def config(self): ) @cached_property - def local_dvc_dir(self): + def local_dvc_dir(self) -> Optional[str]: from dvc.fs import GitFileSystem, LocalFileSystem if not self.dvc_dir: @@ -263,10 +263,10 @@ def local_dvc_dir(self): if not isinstance(self.fs, GitFileSystem): return None - relparts = () + relparts: Tuple[str, ...] = () if self.root_dir != "/": # subrepo - relparts = self.fs.path.relparts(self.root_dir, "/") + relparts = self.fs.relparts(self.root_dir, "/") dvc_dir = os.path.join( self.scm.root_dir, @@ -395,7 +395,7 @@ def get_data_index_entry( index = subrepo.index.data[workspace] else: index = self.index.data[workspace] - key = self.fs.path.relparts(path, self.root_dir) + key = self.fs.relparts(path, self.root_dir) try: return index, index[key] @@ -411,18 +411,18 @@ def find_root(cls, root=None, fs=None) -> str: fs = fs or localfs root = root or os.curdir - root_dir = fs.path.abspath(root) + root_dir = fs.abspath(root) if not fs.isdir(root_dir): raise NotDvcRepoError(f"directory '{root}' does not exist") while True: - dvc_dir = fs.path.join(root_dir, cls.DVC_DIR) + dvc_dir = fs.join(root_dir, cls.DVC_DIR) if fs.isdir(dvc_dir): return root_dir if isinstance(fs, LocalFileSystem) and os.path.ismount(root_dir): break - parent = fs.path.parent(root_dir) + parent = fs.parent(root_dir) if parent == root_dir: break root_dir = parent @@ -440,7 +440,7 @@ def find_dvc_dir(cls, root=None, fs=None) -> str: fs = fs or localfs root_dir = cls.find_root(root, fs=fs) - return fs.path.join(root_dir, cls.DVC_DIR) + return fs.join(root_dir, cls.DVC_DIR) @staticmethod def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False) -> "Repo": @@ -542,19 +542,19 @@ def find_outs_by_path(self, path, outs=None, recursive=False, strict=True): # using `outs_graph` to ensure graph checks are run outs = outs or self.index.outs_graph - abs_path = self.fs.path.abspath(path) + abs_path = self.fs.abspath(path) fs_path = abs_path def func(out): def eq(one, two): return one == two - match = eq if strict else out.fs.path.isin_or_eq + match = eq if strict else out.fs.isin_or_eq if out.protocol == "local" and match(fs_path, out.fs_path): return True - if recursive and out.fs.path.isin(out.fs_path, fs_path): + if recursive and out.fs.isin(out.fs_path, fs_path): return True return False @@ -566,7 +566,7 @@ def eq(one, two): return matched def is_dvc_internal(self, path): - path_parts = self.fs.path.normpath(path).split(self.fs.sep) + path_parts = self.fs.normpath(path).split(self.fs.sep) return self.DVC_DIR in path_parts @cached_property @@ -613,10 +613,10 @@ def site_cache_dir(self) -> str: cache_dir = self.config["core"].get("site_cache_dir") or site_cache_dir() if isinstance(self.fs, GitFileSystem): - relparts = () + relparts: Tuple[str, ...] = () if self.root_dir != "/": # subrepo - relparts = self.fs.path.relparts(self.root_dir, "/") + relparts = self.fs.relparts(self.root_dir, "/") root_dir = os.path.join(self.scm.root_dir, *relparts) else: root_dir = self.root_dir diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 0e630fbd2a..be1977c767 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -181,7 +181,7 @@ def _add_transfer( def _add(stage: "Stage", source: Optional[str] = None, no_commit: bool = False) -> None: out = stage.outs[0] - path = out.fs.path.abspath(source) if source else None + path = out.fs.abspath(source) if source else None try: stage.add_outs(path, no_commit=no_commit) except CacheLinkError: diff --git a/dvc/repo/artifacts.py b/dvc/repo/artifacts.py index 87a62ec7e7..e2056e82e2 100644 --- a/dvc/repo/artifacts.py +++ b/dvc/repo/artifacts.py @@ -229,7 +229,7 @@ def _download_studio( dvc_studio_config=dvc_studio_config, **kwargs, ).items(): - to_info = localfs.path.join(out, path) + to_info = localfs.join(out, path) if localfs.exists(to_info) and not force: hint = "\nTo override it, re-run with '--force'." raise FileExistsLocallyError( # noqa: TRY301 @@ -254,7 +254,7 @@ def _download_studio( fs, from_infos, localfs, to_infos, callback=cb, batch_size=jobs ) - return len(to_infos), relpath(localfs.path.commonpath(to_infos)) + return len(to_infos), relpath(localfs.commonpath(to_infos)) @classmethod def get( # noqa: PLR0913 diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index 38609677de..549c531bf9 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -60,12 +60,12 @@ def brancher( from dvc.fs import LocalFileSystem repo_root_parts: Tuple[str, ...] = () - if self.fs.path.isin(self.root_dir, self.scm.root_dir): - repo_root_parts = self.fs.path.relparts(self.root_dir, self.scm.root_dir) + if self.fs.isin(self.root_dir, self.scm.root_dir): + repo_root_parts = self.fs.relparts(self.root_dir, self.scm.root_dir) cwd_parts: Tuple[str, ...] = () - if self.fs.path.isin(self.fs.path.getcwd(), self.scm.root_dir): - cwd_parts = self.fs.path.relparts(self.fs.path.getcwd(), self.scm.root_dir) + if self.fs.isin(self.fs.getcwd(), self.scm.root_dir): + cwd_parts = self.fs.relparts(self.fs.getcwd(), self.scm.root_dir) saved_fs = self.fs saved_root = self.root_dir @@ -125,18 +125,18 @@ def _switch_fs( logger.trace("switching fs to revision %s", rev[:7]) assert isinstance(repo.scm, Git) fs = GitFileSystem(scm=repo.scm, rev=rev) - root_dir = repo.fs.path.join("/", *repo_root_parts) + root_dir = repo.fs.join("/", *repo_root_parts) if not fs.exists(root_dir): raise NotDvcRepoError(f"Commit '{rev[:7]}' does not contain a DVC repo") repo.fs = fs repo.root_dir = root_dir - repo.dvc_dir = fs.path.join(root_dir, repo.DVC_DIR) + repo.dvc_dir = fs.join(root_dir, repo.DVC_DIR) repo._reset() if cwd_parts: - cwd = repo.fs.path.join("/", *cwd_parts) - repo.fs.path.chdir(cwd) + cwd = repo.fs.join("/", *cwd_parts) + repo.fs.chdir(cwd) @contextmanager @@ -148,12 +148,12 @@ def switch(repo: "Repo", rev: str) -> Iterator[str]: rev = resolve_rev(repo.scm, rev) repo_root_parts: Tuple[str, ...] = () - if repo.fs.path.isin(repo.root_dir, repo.scm.root_dir): - repo_root_parts = repo.fs.path.relparts(repo.root_dir, repo.scm.root_dir) + if repo.fs.isin(repo.root_dir, repo.scm.root_dir): + repo_root_parts = repo.fs.relparts(repo.root_dir, repo.scm.root_dir) cwd_parts: Tuple[str, ...] = () - if repo.fs.path.isin(repo.fs.path.getcwd(), repo.scm.root_dir): - cwd_parts = repo.fs.path.relparts(repo.fs.path.getcwd(), repo.scm.root_dir) + if repo.fs.isin(repo.fs.getcwd(), repo.scm.root_dir): + cwd_parts = repo.fs.relparts(repo.fs.getcwd(), repo.scm.root_dir) saved_fs = repo.fs saved_root = repo.root_dir diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 097ea0aca6..8f14c8c1cf 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -81,7 +81,7 @@ def _check_can_delete( if cache_fs.exists(cache_path): continue - entry_paths.append(fs.path.join(path, *(entry.key or ()))) + entry_paths.append(fs.join(path, *(entry.key or ()))) if not entry_paths: return @@ -172,7 +172,7 @@ def checkout_onerror(src_path, dest_path, _exc): ) for out_path in out_paths: - if self.fs.path.isin_or_eq(dest_path, out_path): + if self.fs.isin_or_eq(dest_path, out_path): failed.add(out_path) with ui.progress( @@ -195,7 +195,7 @@ def checkout_onerror(src_path, dest_path, _exc): typ_map = {ADD: "added", DELETE: "deleted", MODIFY: "modified"} for key, typ in out_changes.items(): - out_path = self.fs.path.join(self.root_dir, *key) + out_path = self.fs.join(self.root_dir, *key) if out_path in failed: self.fs.remove(out_path, recursive=True) diff --git a/dvc/repo/data.py b/dvc/repo/data.py index b365486754..ce6344fdce 100644 --- a/dvc/repo/data.py +++ b/dvc/repo/data.py @@ -205,7 +205,7 @@ class Status(TypedDict): def _transform_git_paths_to_dvc(repo: "Repo", files: Iterable[str]) -> List[str]: """Transform files rel. to Git root to DVC root, and drop outside files.""" - rel = repo.fs.path.relpath(repo.root_dir, repo.scm.root_dir).rstrip("/") + rel = repo.fs.relpath(repo.root_dir, repo.scm.root_dir).rstrip("/") # if we have repo root in a different location than scm's root, # i.e. subdir repo, all git_paths need to be transformed rel. to the DVC @@ -215,11 +215,11 @@ def _transform_git_paths_to_dvc(repo: "Repo", files: Iterable[str]) -> List[str] length = len(prefix) files = (file[length:] for file in files if file.startswith(prefix)) - start = repo.fs.path.relpath(repo.fs.path.getcwd(), repo.root_dir) + start = repo.fs.relpath(repo.fs.getcwd(), repo.root_dir) if start in (os.curdir, ""): return list(files) # we need to convert repo relative paths to curdir relative. - return [repo.fs.path.relpath(file, start) for file in files] + return [repo.fs.relpath(file, start) for file in files] def status( diff --git a/dvc/repo/experiments/stash.py b/dvc/repo/experiments/stash.py index c0e46d739c..607fef5c7e 100644 --- a/dvc/repo/experiments/stash.py +++ b/dvc/repo/experiments/stash.py @@ -171,7 +171,7 @@ def _copy_difference(self, left_rev: str, right_rev: str): right_fs = self.scm.get_fs(right_rev) paths = [path for path in left_fs.find("/") if not right_fs.exists(path)] dest_paths = [ - localfs.path.join(self.scm.root_dir, left_fs.path.relpath(path, "/")) + localfs.join(self.scm.root_dir, left_fs.relpath(path, "/")) for path in paths ] for src, dest in zip(paths, dest_paths): diff --git a/dvc/repo/fetch.py b/dvc/repo/fetch.py index 3a3c795091..1967725a06 100644 --- a/dvc/repo/fetch.py +++ b/dvc/repo/fetch.py @@ -219,7 +219,7 @@ def _log_unversioned(data: List["DataIndex"]) -> Tuple[List["DataIndex"], int]: index.storage_map = fs_index.storage_map for key, entry in fs_index.iteritems(): if entry.meta and not entry.meta.isdir and entry.meta.version_id is None: - unversioned.append(fs.unstrip_protocol(fs.path.join(remote.path, *key))) + unversioned.append(fs.unstrip_protocol(fs.join(remote.path, *key))) else: index[key] = entry fs_index.close() diff --git a/dvc/repo/graph.py b/dvc/repo/graph.py index e285e1bbe5..24b62ffd0f 100644 --- a/dvc/repo/graph.py +++ b/dvc/repo/graph.py @@ -126,7 +126,7 @@ def build_graph(stages, outs_trie=None): outs_trie = outs_trie or build_outs_trie(stages) for stage in stages: - out = outs_trie.shortest_prefix(localfs.path.parts(stage.path)).value + out = outs_trie.shortest_prefix(localfs.parts(stage.path)).value if out: raise StagePathAsOutputError(stage, str(out)) @@ -139,7 +139,7 @@ def build_graph(stages, outs_trie=None): continue for dep in stage.deps: - dep_key = dep.fs.path.parts(dep.fs_path) + dep_key = dep.fs.parts(dep.fs_path) overlapping = [n.value for n in outs_trie.prefixes(dep_key)] if outs_trie.has_subtrie(dep_key): overlapping.extend(outs_trie.values(prefix=dep_key)) @@ -165,7 +165,7 @@ def build_outs_graph(graph, outs_trie): if stage.is_db_import: continue for dep in stage.deps: - dep_key = dep.fs.path.parts(dep.fs_path) + dep_key = dep.fs.parts(dep.fs_path) overlapping = [n.value for n in outs_trie.prefixes(dep_key)] if outs_trie.has_subtrie(dep_key): overlapping.extend(outs_trie.values(prefix=dep_key)) diff --git a/dvc/repo/index.py b/dvc/repo/index.py index d49cfa90bf..76e2ce3a49 100644 --- a/dvc/repo/index.py +++ b/dvc/repo/index.py @@ -97,7 +97,7 @@ def is_out_or_ignored(root, directory): for root, dirs, files in walk_iter: dvcfile_filter = partial(is_dvcfile_and_not_ignored, root) for file in filter(dvcfile_filter, files): - file_path = fs.path.join(root, file) + file_path = fs.join(root, file) try: index = Index.from_file(repo, file_path) except DvcException as exc: @@ -201,7 +201,7 @@ def _load_storage_from_out(storage_map, key, out): FileStorage( key, fs_cache.fs, - fs_cache.fs.path.join( + fs_cache.fs.join( fs_cache.path, dep.fs.protocol, tokenize(dep.fs_path) ), ) @@ -414,7 +414,7 @@ def metric_keys(self) -> Dict[str, Set["DataIndexKey"]]: by_workspace[workspace].add(key) for path in _collect_top_level_metrics(self.repo): - key = self.repo.fs.path.relparts(path, self.repo.root_dir) + key = self.repo.fs.relparts(path, self.repo.root_dir) by_workspace["repo"].add(key) return dict(by_workspace) @@ -432,7 +432,7 @@ def param_keys(self) -> Dict[str, Set["DataIndexKey"]]: param_paths = chain(param_paths, [default_file]) for path in param_paths: - key = self.repo.fs.path.relparts(path, self.repo.root_dir) + key = self.repo.fs.relparts(path, self.repo.root_dir) by_workspace["repo"].add(key) return dict(by_workspace) @@ -451,7 +451,7 @@ def plot_keys(self) -> Dict[str, Set["DataIndexKey"]]: by_workspace[workspace].add(key) for path in self._plot_sources: - key = self.repo.fs.path.parts(path) + key = self.repo.fs.parts(path) by_workspace["repo"].add(key) return dict(by_workspace) @@ -701,8 +701,8 @@ def _data_prefixes(self) -> Dict[str, "_DataPrefixes"]: if not out.use_cache: continue workspace, key = out.index_key - if filter_info and out.fs.path.isin(filter_info, out.fs_path): - key = key + out.fs.path.relparts(filter_info, out.fs_path) + if filter_info and out.fs.isin(filter_info, out.fs_path): + key = key + out.fs.relparts(filter_info, out.fs_path) entry = self._index.data[workspace].get(key) if entry and entry.meta and entry.meta.isdir: prefixes[workspace].recursive.add(key) @@ -718,8 +718,8 @@ def data_keys(self) -> Dict[str, Set["DataIndexKey"]]: continue workspace, key = out.index_key - if filter_info and out.fs.path.isin(filter_info, out.fs_path): - key = key + out.fs.path.relparts(filter_info, out.fs_path) + if filter_info and out.fs.isin(filter_info, out.fs_path): + key = key + out.fs.relparts(filter_info, out.fs_path) ret[workspace].add(key) return dict(ret) @@ -782,7 +782,7 @@ def build_data_index( # noqa: C901, PLR0912 data = DataIndex() parents = set() for key in index.data_keys.get(workspace, set()): - out_path = fs.path.join(path, *key) + out_path = fs.join(path, *key) for key_len in range(1, len(key)): parents.add(key[:key_len]) @@ -827,7 +827,7 @@ def build_data_index( # noqa: C901, PLR0912 callback.relative_update(1) for key in parents: - parent_path = fs.path.join(path, *key) + parent_path = fs.join(path, *key) if not fs.exists(parent_path): continue direntry = DataIndexEntry(key=key, meta=Meta(isdir=True), loaded=True) diff --git a/dvc/repo/ls.py b/dvc/repo/ls.py index 3e3e75f2e7..19d4d7b829 100644 --- a/dvc/repo/ls.py +++ b/dvc/repo/ls.py @@ -89,7 +89,7 @@ def _ls( if not recursive: files.update(dirs) - parts = fs.path.relparts(root, fs_path) + parts = fs.relparts(root, fs_path) if parts == (".",): parts = () diff --git a/dvc/repo/ls_url.py b/dvc/repo/ls_url.py index 32253a9ef7..f51177a707 100644 --- a/dvc/repo/ls_url.py +++ b/dvc/repo/ls_url.py @@ -18,7 +18,7 @@ def ls_url(url, *, fs_config=None, recursive=False, config=None): for info in files.values(): ls_info = { - "path": fs.path.relpath(info["name"], fs_path), + "path": fs.relpath(info["name"], fs_path), "isdir": info["type"] == "directory", "size": info.get("size"), } diff --git a/dvc/repo/metrics/show.py b/dvc/repo/metrics/show.py index c43e4faa6c..a2f7229b59 100644 --- a/dvc/repo/metrics/show.py +++ b/dvc/repo/metrics/show.py @@ -35,10 +35,10 @@ def _collect_top_level_metrics(repo: "Repo") -> Iterator[str]: top_metrics = repo.index._metrics for dvcfile, metrics in top_metrics.items(): - wdir = repo.fs.path.relpath(repo.fs.path.parent(dvcfile), repo.root_dir) + wdir = repo.fs.relpath(repo.fs.parent(dvcfile), repo.root_dir) for file in metrics: - path = repo.fs.path.join(wdir, as_posix(file)) - yield repo.fs.path.normpath(path) + path = repo.fs.join(wdir, as_posix(file)) + yield repo.fs.normpath(path) def _extract_metrics(metrics, path: str): @@ -101,7 +101,7 @@ def _collect_metrics( if not targets or outs_only: outs = metrics_from_target(repo, stages) if stages else repo.index.metrics - relpath = repo.fs.path.relpath + relpath = repo.fs.relpath metrics.extend(relpath(out.fs_path, repo.root_dir) for out in outs) if not targets and not outs_only and not stages: @@ -128,8 +128,8 @@ class Result(TypedDict, total=False): def to_relpath(fs: "FileSystem", root_dir: str, d: Result) -> Result: - relpath = fs.path.relpath - cwd = fs.path.getcwd() + relpath = fs.relpath + cwd = fs.getcwd() start = relpath(cwd, root_dir) data = d.get("data") @@ -156,7 +156,7 @@ def _gather_metrics( fs = repo.dvcfs for fs_path, result in _read_metrics(fs, files, cache=True): repo_path = fs_path.lstrip(fs.root_marker) - repo_os_path = os.sep.join(fs.path.parts(repo_path)) + repo_os_path = os.sep.join(fs.parts(repo_path)) if not isinstance(result, Exception): data.update({repo_os_path: FileResult(data=result)}) continue diff --git a/dvc/repo/open_repo.py b/dvc/repo/open_repo.py index d92546a1b7..8cc08b187b 100644 --- a/dvc/repo/open_repo.py +++ b/dvc/repo/open_repo.py @@ -71,7 +71,7 @@ def make_repo(path, fs=None, **_kwargs): _config = cache_config.copy() if os.path.isdir(url): fs = fs or localfs - repo_path = os.path.join(url, *fs.path.relparts(path, root_dir)) + repo_path = os.path.join(url, *fs.relparts(path, root_dir)) _config.update(_get_remote_config(repo_path)) return Repo(path, fs=fs, config=_config, **_kwargs) diff --git a/dvc/repo/params/show.py b/dvc/repo/params/show.py index 0ce0f69fb3..ea5ceb0139 100644 --- a/dvc/repo/params/show.py +++ b/dvc/repo/params/show.py @@ -21,10 +21,10 @@ def _collect_top_level_params(repo: "Repo") -> Iterator[str]: top_params = repo.index._params for dvcfile, params in top_params.items(): - wdir = repo.fs.path.relpath(repo.fs.path.parent(dvcfile), repo.root_dir) + wdir = repo.fs.relpath(repo.fs.parent(dvcfile), repo.root_dir) for file in params: - path = repo.fs.path.join(wdir, as_posix(file)) - yield repo.fs.path.normpath(path) + path = repo.fs.join(wdir, as_posix(file)) + yield repo.fs.normpath(path) def params_from_target( @@ -55,7 +55,7 @@ def _collect_params( if not targets or stages: deps = params_from_target(repo, stages) if stages else repo.index.params - relpath = repo.fs.path.relpath + relpath = repo.fs.relpath params.extend( {relpath(dep.fs_path, repo.root_dir): list(dep.params)} for dep in deps ) @@ -93,7 +93,7 @@ def _collect_vars(repo, params, stages=None) -> Dict: # to reduce noise and duplication, they are skipped # `file` is relative - abspath = repo.fs.path.abspath(file) + abspath = repo.fs.abspath(file) repo_path = repo.dvcfs.from_os_path(abspath) if repo_path in params: continue @@ -138,7 +138,7 @@ def _gather_params( fs = repo.dvcfs for fs_path, result in _read_params(fs, files_keypaths, cache=True): repo_path = fs_path.lstrip(fs.root_marker) - repo_os_path = os.sep.join(fs.path.parts(repo_path)) + repo_os_path = os.sep.join(fs.parts(repo_path)) if not isinstance(result, Exception): data.update({repo_os_path: FileResult(data=result)}) continue diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 7d564da24e..62e559c8fe 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -386,7 +386,7 @@ def _matches(targets, config_file, plot_id): def _normpath(path): - # TODO dvcfs.path.normopath normalizes to windows path on Windows + # TODO dvcfs.normopath normalizes to windows path on Windows # even though other methods work as expected import posixpath @@ -399,7 +399,7 @@ def _relpath(fs, path): # and invoking from some subdir `dvcfile.relpath` returns strange long # relative paths # ("../../../../../../dvc.yaml") - investigate - return fs.path.relpath(fs.path.join("/", fs.from_os_path(path)), fs.path.getcwd()) + return fs.relpath(fs.join("/", fs.from_os_path(path)), fs.getcwd()) def _collect_output_plots(repo, targets, props, onerror: Optional[Callable] = None): @@ -413,7 +413,7 @@ def _collect_output_plots(repo, targets, props, onerror: Optional[Callable] = No if _matches(targets, config_path, str(plot)): unpacked = unpack_if_dir( fs, - _normpath(fs.path.join(wdir_relpath, plot.def_path)), + _normpath(fs.join(wdir_relpath, plot.def_path)), props={**plot_props, **props}, onerror=onerror, ) @@ -438,7 +438,7 @@ def _adjust_sources(fs, plot_props, config_dir): old_y = new_plot_props.pop("y", {}) new_y = {} for filepath, val in old_y.items(): - new_y[_normpath(fs.path.join(config_dir, filepath))] = val + new_y[_normpath(fs.join(config_dir, filepath))] = val new_plot_props["y"] = new_y return new_plot_props @@ -452,13 +452,13 @@ def _resolve_definitions( onerror: Optional[Callable[[Any], Any]] = None, ): config_path = os.fspath(config_path) - config_dir = fs.path.dirname(config_path) + config_dir = fs.dirname(config_path) result: Dict[str, Dict] = {} for plot_id, plot_props in definitions.items(): if plot_props is None: plot_props = {} if _id_is_path(plot_props): - data_path = _normpath(fs.path.join(config_dir, plot_id)) + data_path = _normpath(fs.join(config_dir, plot_id)) if _matches(targets, config_path, plot_id): unpacked = unpack_if_dir( fs, diff --git a/dvc/repo/stage.py b/dvc/repo/stage.py index ee7c9466bf..6824ce51b1 100644 --- a/dvc/repo/stage.py +++ b/dvc/repo/stage.py @@ -219,7 +219,7 @@ def _get_filepath( self, path: Optional[str] = None, name: Optional[str] = None ) -> str: if path: - return self.repo.fs.path.abspath(path) + return self.repo.fs.abspath(path) path = PROJECT_FILE logger.debug("Assuming '%s' to be a stage inside '%s'", name, path) @@ -348,7 +348,7 @@ def collect( if recursive and self.fs.isdir(target): from dvc.repo.graph import collect_inside_path - path = self.fs.path.abspath(target) + path = self.fs.abspath(target) return collect_inside_path(path, graph or self.repo.index.graph) stages = self.from_target(target, glob=glob) @@ -393,7 +393,7 @@ def collect_granular( if not (recursive and self.fs.isdir(target)): try: (out,) = self.repo.find_outs_by_path(target, strict=False) - return [StageInfo(out.stage, self.fs.path.abspath(target))] + return [StageInfo(out.stage, self.fs.abspath(target))] except OutputNotFoundError: pass diff --git a/dvc/repo/trie.py b/dvc/repo/trie.py index f33aa34d56..de5ed08852 100644 --- a/dvc/repo/trie.py +++ b/dvc/repo/trie.py @@ -9,7 +9,7 @@ def build_outs_trie(stages): for stage in stages: for out in stage.outs: - out_key = out.fs.path.parts(out.fs_path) + out_key = out.fs.parts(out.fs_path) # Check for dup outs if out_key in outs: diff --git a/dvc/repo/worktree.py b/dvc/repo/worktree.py index 0235754983..f3d94ba3ea 100644 --- a/dvc/repo/worktree.py +++ b/dvc/repo/worktree.py @@ -131,10 +131,11 @@ def _merge_push_meta( # noqa: C901 entries.append(entry) if entry.meta is not None and entry.meta.isdir: continue - fs_path = repo.fs.path.join(repo.root_dir, *subkey) - meta, hash_info = old_tree.get( - repo.fs.path.relparts(fs_path, out.fs_path) - ) or (None, None) + fs_path = repo.fs.join(repo.root_dir, *subkey) + meta, hash_info = old_tree.get(repo.fs.relparts(fs_path, out.fs_path)) or ( + None, + None, + ) entry.hash_info = hash_info if entry.meta: entry.meta.remote = remote @@ -354,11 +355,11 @@ def _get_update_diff_index( # downloading if out.isdir(): if not entry.meta.isdir: - fs_path = repo.fs.path.join(repo.root_dir, *entry.key) + fs_path = repo.fs.join(repo.root_dir, *entry.key) tree = out.obj assert isinstance(tree, Tree) _, entry.hash_info = tree.get( # type: ignore[misc] - repo.fs.path.relparts(fs_path, out.fs_path) + repo.fs.relparts(fs_path, out.fs_path) ) else: entry.hash_info = out.hash_info diff --git a/dvc/rwlock.py b/dvc/rwlock.py index 7722a62fa7..8d06df1632 100644 --- a/dvc/rwlock.py +++ b/dvc/rwlock.py @@ -43,10 +43,10 @@ def __init__(self, path): @contextmanager def _edit_rwlock(lock_dir, fs, hardlink): - path = fs.path.join(lock_dir, RWLOCK_FILE) + path = fs.join(lock_dir, RWLOCK_FILE) rwlock_guard = make_lock( - fs.path.join(lock_dir, RWLOCK_LOCK), + fs.join(lock_dir, RWLOCK_LOCK), tmp_dir=lock_dir, hardlink_lock=hardlink, ) @@ -82,7 +82,7 @@ def _check_blockers(tmp_dir, lock, info, *, mode, waiters): # noqa: C901, PLR09 to_release = defaultdict(list) for path, infos in lock[mode].items(): for waiter_path in waiters: - if localfs.path.overlaps(waiter_path, path): + if localfs.overlaps(waiter_path, path): break else: continue diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index ade80ad6ac..6047df30c8 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -656,7 +656,7 @@ def _check_missing_outputs(self) -> None: def filter_outs(self, fs_path) -> Iterable["Output"]: def _func(o): - return o.fs.path.isin_or_eq(fs_path, o.fs_path) + return o.fs.isin_or_eq(fs_path, o.fs_path) return filter(_func, self.outs) if fs_path else self.outs diff --git a/dvc/stage/cache.py b/dvc/stage/cache.py index 4e8501eb1c..7d3d1078ce 100644 --- a/dvc/stage/cache.py +++ b/dvc/stage/cache.py @@ -181,9 +181,9 @@ def save(self, stage): path = self._get_cache_path(cache_key, cache_value) local_fs = self.repo.cache.legacy.fs - parent = local_fs.path.parent(path) + parent = local_fs.parent(path) self.repo.cache.legacy.makedirs(parent) - tmp = local_fs.path.join(parent, fs.utils.tmp_fname()) + tmp = local_fs.join(parent, fs.utils.tmp_fname()) assert os.path.exists(parent) assert os.path.isdir(parent) dump_yaml(tmp, cache) @@ -236,7 +236,7 @@ def transfer(self, from_odb, to_odb): from_fs = from_odb.fs to_fs = to_odb.fs func = fs.generic.log_exceptions(fs.generic.copy) - runs = from_fs.path.join(from_odb.path, "runs") + runs = from_fs.join(from_odb.path, "runs") http_odb = next( (odb for odb in (from_odb, to_odb) if isinstance(odb.fs, HTTPFileSystem)), @@ -252,20 +252,20 @@ def transfer(self, from_odb, to_odb): return ret for src in from_fs.find(runs): - rel = from_fs.path.relpath(src, from_odb.path) + rel = from_fs.relpath(src, from_odb.path) if not isinstance(to_fs, LocalFileSystem): - rel = from_fs.path.as_posix(rel) + rel = from_fs.as_posix(rel) - dst = to_fs.path.join(to_odb.path, rel) - key = to_fs.path.parent(dst) + dst = to_fs.join(to_odb.path, rel) + key = to_fs.parent(dst) # check if any build cache already exists for this key # TODO: check if MaxKeys=1 or something like that applies # or otherwise this will take a lot of time! if to_fs.exists(key) and first(to_fs.find(key)): continue - src_name = from_fs.path.name(src) - parent_name = from_fs.path.name(from_fs.path.parent(src)) + src_name = from_fs.name(src) + parent_name = from_fs.name(from_fs.parent(src)) with TqdmCallback( desc=src_name, bytes=True, diff --git a/dvc/stage/utils.py b/dvc/stage/utils.py index 2a294a76a2..4722d9ba1b 100644 --- a/dvc/stage/utils.py +++ b/dvc/stage/utils.py @@ -184,9 +184,9 @@ def resolve_wdir(wdir, path): def resolve_paths(fs, path, wdir=None): - path = fs.path.abspath(path) + path = fs.abspath(path) wdir = wdir or os.curdir - wdir = fs.path.abspath(fs.path.join(fs.path.dirname(path), wdir)) + wdir = fs.abspath(fs.join(fs.dirname(path), wdir)) return path, wdir diff --git a/dvc/testing/workspace_tests.py b/dvc/testing/workspace_tests.py index e998601147..c9846873c0 100644 --- a/dvc/testing/workspace_tests.py +++ b/dvc/testing/workspace_tests.py @@ -163,8 +163,8 @@ def test_import_no_download(self, tmp_dir, dvc, remote_version_aware): def match_files(fs, entries, expected): - entries_content = {(fs.path.normpath(d["path"]), d["isdir"]) for d in entries} - expected_content = {(fs.path.normpath(d["path"]), d["isdir"]) for d in expected} + entries_content = {(fs.normpath(d["path"]), d["isdir"]) for d in entries} + expected_content = {(fs.normpath(d["path"]), d["isdir"]) for d in expected} assert entries_content == expected_content @@ -177,7 +177,7 @@ def test_file(self, cloud, fname): match_files( fs, result, - [{"path": fs.path.join(fs_path, fname), "isdir": False}], + [{"path": fs.join(fs_path, fname), "isdir": False}], ) def test_dir(self, cloud): diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index c6998ded3d..3eba21e200 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -287,7 +287,7 @@ def resolve_paths(repo, out, always_local=False): # urlparse interprets windows drive letters as URL scheme scheme = "" - if scheme or not localfs.path.isin_or_eq(abspath, repo.root_dir): + if scheme or not localfs.isin_or_eq(abspath, repo.root_dir): wdir = os.getcwd() elif contains_symlink_up_to(dirname, repo.root_dir) or ( os.path.isdir(abspath) and localfs.is_symlink(abspath) diff --git a/dvc/utils/serialize/__init__.py b/dvc/utils/serialize/__init__.py index 2433943da6..e42cb8cae3 100644 --- a/dvc/utils/serialize/__init__.py +++ b/dvc/utils/serialize/__init__.py @@ -21,7 +21,7 @@ def load_path(fs_path, fs, **kwargs): - suffix = fs.path.suffix(fs_path).lower() + suffix = fs.suffix(fs_path).lower() loader = LOADERS[suffix] return loader(fs_path, fs=fs, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 0cf340a5f5..41b28d8c06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "configobj>=5.0.6", "distro>=1.3", "dpath<3,>=2.1.0", - "dvc-data>=2.25,<2.26", + "dvc-data>=3,<3.1", "dvc-http>=2.29.0", "dvc-render>=1.0.0,<2", "dvc-studio-client>=0.17.1,<1", @@ -45,7 +45,7 @@ dependencies = [ "flufl.lock>=5,<8", "funcy>=1.14", "grandalf<1,>=0.7", - "gto>=1.4.0,<2", + "gto>=1.6.0,<2", "hydra-core>=1.1", "iterative-telemetry>=0.0.7", "networkx>=2.5", @@ -59,7 +59,7 @@ dependencies = [ "requests>=2.22", "rich>=12", "ruamel.yaml>=0.17.11", - "scmrepo>=1.5.0,<2", + "scmrepo>=2.0.2,<3", "shortuuid>=0.5", "shtab<2,>=1.3.4", "tabulate>=0.8.7", @@ -71,11 +71,11 @@ dependencies = [ [project.optional-dependencies] all = ["dvc[azure,gdrive,gs,hdfs,oss,s3,ssh,webdav,webhdfs]"] -azure = ["dvc-azure>=2.23.0"] +azure = ["dvc-azure>=3,<4"] dev = ["dvc[azure,gdrive,gs,hdfs,lint,oss,s3,ssh,tests,webdav,webhdfs]"] -gdrive = ["dvc-gdrive==2.20"] -gs = ["dvc-gs==2.22.1"] -hdfs = ["dvc-hdfs==2.19"] +gdrive = ["dvc-gdrive>=3,<4"] +gs = ["dvc-gs>=3,<4"] +hdfs = ["dvc-hdfs>=3,<4"] lint = [ "mypy==1.7.1", "pandas-stubs", @@ -87,10 +87,10 @@ lint = [ "types-toml", "types-tqdm", ] -oss = ["dvc-oss>=2.22.0"] -s3 = ["dvc-s3==2.23.0"] -ssh = ["dvc-ssh>=2.22.1,<3"] -ssh_gssapi = ["dvc-ssh[gssapi]>=2.22.1,<3"] +oss = ["dvc-oss>=3,<4"] +s3 = ["dvc-s3>=3,<4"] +ssh = ["dvc-ssh>=3,<4"] +ssh_gssapi = ["dvc-ssh[gssapi]>=3,<4"] testing = [ "pytest-benchmark[histogram]", "pytest-test-utils", @@ -116,9 +116,9 @@ tests = [ "sqlalchemy>=1,<3", # optional dependency for `import-db` "pandas>=1", ] -webdav = ["dvc-webdav==2.19.1"] -webhdfs = ["dvc-webhdfs==2.19"] -webhdfs_kerberos = ["dvc-webhdfs[kerberos]==2.19"] +webdav = ["dvc-webdav>=3,<4"] +webhdfs = ["dvc-webhdfs>=3,<4"] +webhdfs_kerberos = ["dvc-webhdfs[kerberos]>=3,<4"] [project.urls] Documentation = "https://dvc.org/doc" diff --git a/tests/func/test_fs.py b/tests/func/test_fs.py index d830f7b2f1..b0d0c69dea 100644 --- a/tests/func/test_fs.py +++ b/tests/func/test_fs.py @@ -13,15 +13,15 @@ def test_cleanfs_subrepo(tmp_dir, dvc, scm, monkeypatch): path = subrepo_dir.fs_path - assert dvc.fs.exists(dvc.fs.path.join(path, "foo")) - assert dvc.fs.isfile(dvc.fs.path.join(path, "foo")) - assert dvc.fs.exists(dvc.fs.path.join(path, "dir")) - assert dvc.fs.isdir(dvc.fs.path.join(path, "dir")) - - assert subrepo.fs.exists(subrepo.fs.path.join(path, "foo")) - assert subrepo.fs.isfile(subrepo.fs.path.join(path, "foo")) - assert subrepo.fs.exists(subrepo.fs.path.join(path, "dir")) - assert subrepo.fs.isdir(subrepo.fs.path.join(path, "dir")) + assert dvc.fs.exists(dvc.fs.join(path, "foo")) + assert dvc.fs.isfile(dvc.fs.join(path, "foo")) + assert dvc.fs.exists(dvc.fs.join(path, "dir")) + assert dvc.fs.isdir(dvc.fs.join(path, "dir")) + + assert subrepo.fs.exists(subrepo.fs.join(path, "foo")) + assert subrepo.fs.isfile(subrepo.fs.join(path, "foo")) + assert subrepo.fs.exists(subrepo.fs.join(path, "dir")) + assert subrepo.fs.isdir(subrepo.fs.join(path, "dir")) def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): diff --git a/tests/unit/fs/test_path.py b/tests/unit/fs/test_path.py deleted file mode 100644 index 0ecaa4ec5a..0000000000 --- a/tests/unit/fs/test_path.py +++ /dev/null @@ -1,29 +0,0 @@ -import pytest - -from dvc.fs import Path - - -@pytest.mark.parametrize("prefix", ["", "/"]) -@pytest.mark.parametrize("postfix", ["", "/"]) -@pytest.mark.parametrize( - "path,expected", - [ - ("path", ("path",)), - ("some/path", ("some", "path")), - ], -) -def test_parts_posix(prefix, postfix, path, expected): - assert Path("/").parts(prefix + path + postfix) == tuple(prefix) + expected - - -@pytest.mark.parametrize("postfix", ["", "\\"]) -@pytest.mark.parametrize( - "path,expected", - [ - ("path", ("path",)), - ("c:\\path", ("c:", "\\", "path")), - ("some\\path", ("some", "path")), - ], -) -def test_parts_nt(postfix, path, expected): - assert Path("\\").parts(path + postfix) == expected diff --git a/tests/unit/test_ignore.py b/tests/unit/test_ignore.py index 0a2b126ec4..7118f3e4fb 100644 --- a/tests/unit/test_ignore.py +++ b/tests/unit/test_ignore.py @@ -173,11 +173,10 @@ def test_match_ignore_from_file( ) dvcignore_dirname = os.path.dirname(dvcignore_path) - fs = mocker.MagicMock() - fs.path = localfs.path - fs.sep = localfs.sep - mocker.patch.object(fs, "open", mocker.mock_open(read_data="\n".join(patterns))) - ignore_file = DvcIgnorePatterns.from_file(dvcignore_path, fs, "mocked") + mocker.patch.object( + localfs, "open", mocker.mock_open(read_data="\n".join(patterns)) + ) + ignore_file = DvcIgnorePatterns.from_file(dvcignore_path, localfs, "mocked") assert ( ignore_file.matches(dvcignore_dirname, file_to_ignore_relpath) == expected_match