Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 4 additions & 41 deletions dvc/command/remove.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,37 @@
import argparse
import logging

import dvc.prompt as prompt
from dvc.command.base import CmdBase, append_doc_link
from dvc.exceptions import DvcException

logger = logging.getLogger(__name__)


class CmdRemove(CmdBase):
def _is_dvc_only(self, target):
if not self.args.purge:
return True

if self.args.force:
return False

msg = "Are you sure you want to remove '{}' with its outputs?".format(
target
)

if prompt.confirm(msg):
return False

raise DvcException(
"Cannot purge without a confirmation from the user."
" Use `-f` to force."
)

def run(self):
for target in self.args.targets:
try:
dvc_only = self._is_dvc_only(target)
self.repo.remove(target, dvc_only=dvc_only)
self.repo.remove(target, outs=self.args.outs)
except DvcException:
logger.exception(f"failed to remove '{target}'")
return 1
return 0


def add_parser(subparsers, parent_parser):
REMOVE_HELP = "Remove DVC-tracked files or directories."
REMOVE_HELP = "Remove stage entry and unprotect outputs"
remove_parser = subparsers.add_parser(
"remove",
parents=[parent_parser],
description=append_doc_link(REMOVE_HELP, "remove"),
help=REMOVE_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
remove_parser_group = remove_parser.add_mutually_exclusive_group()
remove_parser_group.add_argument(
"-o",
"--outs",
action="store_true",
default=True,
help="Only remove DVC-file outputs. (Default)",
)
remove_parser_group.add_argument(
"-p",
"--purge",
action="store_true",
default=False,
help="Remove DVC-file and all its outputs.",
)
remove_parser.add_argument(
"-f",
"--force",
"--outs",
action="store_true",
default=False,
help="Force purge.",
help="Remove outputs as well.",
)
remove_parser.add_argument(
"targets", nargs="+", help="DVC-files to remove."
Expand Down
6 changes: 3 additions & 3 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def run(self):
fname=self.args.file,
wdir=self.args.wdir,
no_exec=self.args.no_exec,
overwrite=self.args.overwrite_dvcfile,
overwrite=self.args.overwrite,
run_cache=not self.args.no_run_cache,
no_commit=self.args.no_commit,
outs_persist=self.args.outs_persist,
Expand Down Expand Up @@ -177,10 +177,10 @@ def add_parser(subparsers, parent_parser):
help="Only create stage file without actually running it.",
)
run_parser.add_argument(
"--overwrite-dvcfile",
"--overwrite",
action="store_true",
default=False,
help="Overwrite existing DVC-file without asking for confirmation.",
help="Overwrite existing stage",
)
run_parser.add_argument(
"--no-run-cache",
Expand Down
39 changes: 37 additions & 2 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def dump(self, stage, **kwargs):
"Saving information to '{file}'.".format(file=relpath(self.path))
)
dump_stage_file(self.path, serialize.to_single_stage_file(stage))
self.repo.scm.track_file(relpath(self.path))
self.repo.scm.track_file(self.relpath)

def remove_with_prompt(self, force=False):
if not self.exists():
Expand All @@ -165,6 +165,9 @@ def remove_with_prompt(self, force=False):

self.remove()

def remove_stage(self, stage):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't FileMixin contain this method? It feels a little bit off to accept stage here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pared, how about:

Suggested change
def remove_stage(self, stage):
def remove_stage(self, stage_name):

self.remove()


class PipelineFile(FileMixin):
"""Abstraction for pipelines file, .yaml + .lock combined."""
Expand Down Expand Up @@ -212,7 +215,7 @@ def _dump_pipeline_file(self, stage):
"Adding stage '%s' to '%s'", stage.name, self.relpath,
)
dump_stage_file(self.path, data)
self.repo.scm.track_file(relpath(self.path))
self.repo.scm.track_file(self.relpath)

@property
def stage(self):
Expand All @@ -234,6 +237,22 @@ def remove(self, force=False):
super().remove()
self._lockfile.remove()

def remove_stage(self, stage):
self._lockfile.remove_stage(stage)
if not self.exists():
return

with open(self.path, "r") as f:
d = parse_stage_for_update(f.read(), self.path)

self.validate(d, self.path)
if stage.name not in d.get("stages", {}):
return

logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d["stages"][stage.name]
dump_stage_file(self.path, d)


class Lockfile(FileMixin):
from dvc.schema import COMPILED_LOCKFILE_SCHEMA as SCHEMA
Expand Down Expand Up @@ -271,6 +290,22 @@ def dump(self, stage, **kwargs):
if modified:
self.repo.scm.track_file(self.relpath)

def remove_stage(self, stage):
if not self.exists():
return

with open(self.path) as f:
d = parse_stage_for_update(f.read(), self.path)
self.validate(d, self.path)

if stage.name not in d:
return

logger.debug("Removing '%s' from '%s'", stage.name, self.path)
del d[stage.name]

dump_stage_file(self.path, d)


class Dvcfile:
def __new__(cls, repo, path, **kwargs):
Expand Down
10 changes: 3 additions & 7 deletions dvc/repo/remove.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@


@locked
def remove(self, target, dvc_only=False):
from ..dvcfile import Dvcfile, is_valid_filename

def remove(self, target, outs=False):
path, name = parse_target(target)
stages = self.get_stages(path, name)
for stage in stages:
stage.remove_outs(force=True)

if path and is_valid_filename(path) and not dvc_only:
Dvcfile(self, path).remove()
for stage in stages:
stage.remove(remove_outs=outs, force=outs)

return stages
14 changes: 10 additions & 4 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from funcy import concat, first

from dvc.exceptions import InvalidArgumentError
from dvc.stage.exceptions import DuplicateStageName, InvalidStageName
from dvc.stage.exceptions import (
DuplicateStageName,
InvalidStageName,
StageFileAlreadyExistsError,
)

from ..exceptions import OutputDuplicationError
from . import locked
Expand Down Expand Up @@ -74,10 +78,12 @@ def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):

dvcfile = Dvcfile(self, stage.path)
if dvcfile.exists():
if stage_name and stage_name in dvcfile.stages:
if kwargs.get("overwrite", True):
dvcfile.remove_stage(stage)
elif stage_cls != PipelineStage:
raise StageFileAlreadyExistsError(dvcfile.relpath)
elif stage_name and stage_name in dvcfile.stages:
raise DuplicateStageName(stage_name, dvcfile)
if stage_cls != PipelineStage:
dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True))

try:
self.check_modified_graph([stage])
Expand Down
5 changes: 3 additions & 2 deletions dvc/stage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,13 @@ def unprotect_outs(self):
out.unprotect()

@rwlocked(write=["outs"])
def remove(self, force=False, remove_outs=True):
def remove(self, force=False, remove_outs=True, purge=True):
if remove_outs:
self.remove_outs(ignore_remove=True, force=force)
else:
self.unprotect_outs()
self.dvcfile.remove()
if purge:
self.dvcfile.remove_stage(self)

@rwlocked(read=["deps"], write=["outs"])
def reproduce(self, interactive=False, **kwargs):
Expand Down
134 changes: 132 additions & 2 deletions tests/func/test_dvcfile.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import textwrap

import pytest

from dvc.dvcfile import PIPELINE_FILE, Dvcfile
from dvc.stage.exceptions import StageFileDoesNotExistError
from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK, Dvcfile
from dvc.stage.exceptions import (
StageFileDoesNotExistError,
StageFileFormatError,
)
from dvc.stage.loader import StageNotFound
from dvc.utils.stage import dump_stage_file


def test_run_load_one_for_multistage(tmp_dir, dvc):
Expand Down Expand Up @@ -160,3 +166,127 @@ def test_stage_collection(tmp_dir, dvc):
single_stage=True,
)
assert {s for s in dvc.stages} == {stage1, stage3, stage2}


def test_remove_stage(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")

dvc_file = Dvcfile(dvc, PIPELINE_FILE)
assert dvc_file.exists()
assert {"copy-bar-foobar", "copy-foo-bar"} == set(
dvc_file._load()[0]["stages"].keys()
)

dvc_file.remove_stage(stage)

assert ["copy-bar-foobar"] == list(dvc_file._load()[0]["stages"].keys())

# sanity check
stage2.reload()

# re-check to see if it fails if there's no stage entry
dvc_file.remove_stage(stage)
dvc_file.remove(force=True)
# should not fail when there's no file at all.
dvc_file.remove_stage(stage)


def test_remove_stage_lockfile(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")

dvc_file = Dvcfile(dvc, PIPELINE_FILE)
lock_file = dvc_file._lockfile
assert dvc_file.exists()
assert lock_file.exists()
assert {"copy-bar-foobar", "copy-foo-bar"} == set(lock_file.load().keys())
lock_file.remove_stage(stage)

assert ["copy-bar-foobar"] == list(lock_file.load().keys())

# sanity check
stage2.reload()

# re-check to see if it fails if there's no stage entry
lock_file.remove_stage(stage)
lock_file.remove()
# should not fail when there's no file at all.
lock_file.remove_stage(stage)


def test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", single_stage=True)

dvc_file = Dvcfile(dvc, stage.path)
assert dvc_file.exists()
dvc_file.remove_stage(stage)
assert not dvc_file.exists()

# re-check to see if it fails if there's no stage entry
dvc_file.remove_stage(stage)
dvc_file.remove(force=True)

# should not fail when there's no file at all.
dvc_file.remove_stage(stage)


def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
tmp_dir.gen("foo", "foo")
stage = run_copy("foo", "bar", name="copy-foo-bar")
dvc_file = Dvcfile(dvc, stage.path)
lock_file = dvc_file._lockfile

data = dvc_file._load()[0]
lock_data = lock_file.load()
lock_data["gibberish"] = True
data["gibberish"] = True
dump_stage_file(lock_file.relpath, lock_data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)

lock_file.remove()
dvc_file.dump(stage)

dump_stage_file(dvc_file.relpath, data)
with pytest.raises(StageFileFormatError):
dvc_file.remove_stage(stage)


def test_remove_stage_preserves_comment(tmp_dir, dvc, run_copy):
tmp_dir.gen(
"dvc.yaml",
textwrap.dedent(
"""\
stages:
generate-foo:
cmd: "echo foo > foo"
# This copies 'foo' text to 'foo' file.
outs:
- foo
copy-foo-bar:
cmd: "python copy.py foo bar"
deps:
- foo
outs:
- bar"""
),
)

dvc.reproduce(PIPELINE_FILE)

dvc_file = Dvcfile(dvc, PIPELINE_FILE)

assert dvc_file.exists()
assert (tmp_dir / PIPELINE_LOCK).exists()
assert (tmp_dir / "foo").exists()
assert (tmp_dir / "bar").exists()

dvc_file.remove_stage(dvc_file.stages["copy-foo-bar"])
assert (
"# This copies 'foo' text to 'foo' file."
in (tmp_dir / PIPELINE_FILE).read_text()
)
Loading