Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.index import DataIndex, DataIndexKey, DataIndexView
from dvc_objects.db import ObjectDB
from dvc_objects.fs.base import FileSystem


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -447,3 +448,50 @@ def key_filter(workspace: str, key: "DataIndexKey"):
else:
data[workspace] = DataIndex()
return data


def build_data_index(
index: Union["Index", "IndexView"],
path: str,
fs: "FileSystem",
workspace: Optional[str] = "repo",
) -> "DataIndex":
from dvc_data.index import DataIndex, DataIndexEntry
from dvc_data.index.build import build_entries, build_entry

data = DataIndex()
for out in index.outs:
if not out.use_cache:
continue

ws, key = out.index_key
if ws != workspace:
continue

parts = out.fs.path.relparts(out.fs_path, out.repo.root_dir)
out_path = fs.path.join(path, *parts)

try:
entry = build_entry(out_path, fs)
except FileNotFoundError:
entry = DataIndexEntry(path=out_path, fs=fs)

entry.key = key

if not entry.meta or not entry.meta.isdir:
data.add(entry)
continue

entry.loaded = True
data.add(entry)

for entry in build_entries(out_path, fs):
if not entry.key or entry.key == ("",):
# NOTE: whether the root will be returned by build_entries
# depends on the filesystem (e.g. local doesn't, but s3 does).
entry.key = key
else:
entry.key = key + entry.key
data.add(entry)

return data
23 changes: 2 additions & 21 deletions dvc/repo/worktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def push_worktree(
jobs: Optional[int] = None,
**kwargs,
) -> int:
from dvc.repo.index import build_data_index
from dvc_data.index import checkout

view = worktree_view(
Expand All @@ -108,7 +109,7 @@ def push_worktree(
new_index = view.data["repo"]
if remote.worktree:
logger.debug("indexing latest worktree for '%s'", remote.path)
old_index = _build_worktree_index(repo, remote, view)
old_index = build_data_index(view, remote.path, remote.fs)
logger.debug("Pushing worktree changes to '%s'", remote.path)
else:
old_index = None
Expand Down Expand Up @@ -147,26 +148,6 @@ def push_worktree(
return pushed


def _build_worktree_index(
repo: "Repo", remote: "Remote", view: "IndexView"
) -> "DataIndex":
from dvc_data.index import DataIndex
from dvc_data.index.build import build_entries

index = DataIndex()
for out in view.outs:
_workspace, key = out.index_key
parts = out.fs.path.relparts(out.fs_path, repo.root_dir)
path = remote.fs.path.join(remote.path, *parts)
for entry in build_entries(path, remote.fs):
if not entry.key or entry.key == ("",):
entry.key = key
else:
entry.key = key + entry.key
index.add(entry)
return index


def _update_out_meta(
out: "Output", index: Union["DataIndex", "DataIndexView"]
):
Expand Down