Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions dvc/commands/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def run(self): # noqa: C901, PLR0912
num=self.args.num,
not_in_remote=self.args.not_in_remote,
dry=self.args.dry,
skip_failed=self.args.skip_failed,
)
return 0

Expand Down Expand Up @@ -188,6 +189,12 @@ def add_parser(subparsers, parent_parser):
help="Remote storage to collect garbage in",
metavar="<name>",
)
gc_parser.add_argument(
"--skip-failed",
action="store_true",
default=False,
help="Skip revisions that fail when collected.",
)
gc_parser.add_argument(
"-f",
"--force",
Expand Down
12 changes: 12 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,15 @@ def __init__(

desc = f" @ {stage or version}" if (stage or version) else ""
super().__init__(f"Unable to find artifact '{name}{desc}'")


class RevCollectionError(DvcException):
"""Thrown if a revision failed to be collected.

Args:
rev (str): revision that failed (or "workspace").
"""

def __init__(self, rev):
self.rev = rev
super().__init__(f"Failed to collect '{rev}'")
38 changes: 25 additions & 13 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
Union,
)

from dvc.exceptions import NotDvcRepoError, OutputNotFoundError
from dvc.exceptions import (
DvcException,
NotDvcRepoError,
OutputNotFoundError,
RevCollectionError,
)
from dvc.ignore import DvcIgnoreFilter
from dvc.log import logger
from dvc.utils.objects import cached_property
Expand Down Expand Up @@ -487,6 +492,7 @@ def used_objs( # noqa: PLR0913
revs=None,
num=1,
push: bool = False,
skip_failed: bool = False,
):
"""Get the stages related to the given target and collect
the `info` of its outputs.
Expand All @@ -505,7 +511,7 @@ def used_objs( # noqa: PLR0913
"""
used = defaultdict(set)

for _ in self.brancher(
for rev in self.brancher(
revs=revs,
all_branches=all_branches,
all_tags=all_tags,
Expand All @@ -514,17 +520,23 @@ def used_objs( # noqa: PLR0913
commit_date=commit_date,
num=num,
):
for odb, objs in self.index.used_objs(
targets,
remote=remote,
force=force,
jobs=jobs,
recursive=recursive,
with_deps=with_deps,
push=push,
).items():
used[odb].update(objs)

try:
for odb, objs in self.index.used_objs(
targets,
remote=remote,
force=force,
jobs=jobs,
recursive=recursive,
with_deps=with_deps,
push=push,
).items():
used[odb].update(objs)
except DvcException as exc:
rev = rev or "workspace"
if skip_failed:
logger.warning("Failed to collect '%s', skipping", rev)
else:
raise RevCollectionError(rev) from exc
if used_run_cache:
for odb, objs in self.stage_cache.get_used_objs(
used_run_cache, remote=remote, force=force, jobs=jobs
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def gc( # noqa: PLR0913, C901
num: Optional[int] = None,
not_in_remote: bool = False,
dry: bool = False,
skip_failed: bool = False,
):
# require `workspace` to be true to come into effect.
# assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
Expand Down Expand Up @@ -113,6 +114,7 @@ def gc( # noqa: PLR0913, C901
jobs=jobs,
revs=[rev] if rev else None,
num=num or 1,
skip_failed=skip_failed,
).items():
if odb not in odb_to_obj_ids:
odb_to_obj_ids[odb] = set()
Expand Down
15 changes: 13 additions & 2 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

from dvc.cli import main
from dvc.exceptions import CollectCacheError, InvalidArgumentError
from dvc.exceptions import CollectCacheError, InvalidArgumentError, RevCollectionError
from dvc.fs import LocalFileSystem
from dvc.utils.fs import remove
from dvc_data.hashfile.db.local import LocalHashFileDB
Expand Down Expand Up @@ -111,8 +111,9 @@ def test_gc_no_dir_cache(tmp_dir, dvc):

remove(dir_stage.outs[0].cache_path)

with pytest.raises(CollectCacheError):
with pytest.raises(RevCollectionError) as exc:
dvc.gc(workspace=True)
assert type(exc.value.__cause__) == CollectCacheError

assert _count_files(dvc.cache.local.path) == 4
dvc.gc(force=True, workspace=True)
Expand Down Expand Up @@ -439,3 +440,13 @@ def test_gc_logging(caplog, dvc, good_and_bad_cache):
assert "Removed 3 objects from repo cache." in caplog.text
assert "No unused 'local' cache to remove." in caplog.text
assert "No unused 'legacy' cache to remove." in caplog.text


def test_gc_skip_failed(tmp_dir, dvc):
with open("dvc.yaml", mode="w") as f:
f.write("\ninvalid")

with pytest.raises(RevCollectionError):
dvc.gc(force=True, workspace=True)

dvc.gc(force=True, workspace=True, skip_failed=True)
2 changes: 2 additions & 0 deletions tests/unit/command/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_(dvc, scm, mocker):
"--projects",
"project1",
"project2",
"--skip-failed",
]
)
assert cli_args.func == CmdGC
Expand All @@ -51,6 +52,7 @@ def test_(dvc, scm, mocker):
num=None,
not_in_remote=False,
dry=True,
skip_failed=True,
)

cli_args = parse_args(["gc"])
Expand Down