diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index a7c82dadda..ab70a77ea4 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -10,11 +10,10 @@ class CmdPipelineShow(CmdBase): def _show(self, target, commands, outs, locked): import networkx - from dvc import dvcfile from dvc.utils import parse_target path, name = parse_target(target) - stage = dvcfile.Dvcfile(self.repo, path).stages[name] + stage = self.repo.get_stage(path, name) G = self.repo.graph stages = networkx.dfs_postorder_nodes(G, stage) if locked: @@ -58,12 +57,11 @@ def _build_output_graph(G, target_stage): def _build_graph(self, target, commands=False, outs=False): import networkx - from dvc import dvcfile from dvc.repo.graph import get_pipeline from dvc.utils import parse_target path, name = parse_target(target) - target_stage = dvcfile.Dvcfile(self.repo, path).stages[name] + target_stage = self.repo.get_stage(path, name) G = get_pipeline(self.repo.pipelines, target_stage) nodes = set() diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 0a62f9437b..e9cf1f83a9 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -322,3 +322,15 @@ def __init__(self, path_info): class IsADirectoryError(DvcException): """Raised when a file operation is requested on a directory.""" + + +class NoOutputOrStageError(DvcException): + """ + Raised when the target is neither an output nor a stage name in dvc.yaml + """ + + def __init__(self, target, file): + super().__init__( + f"'{target}' " + f"does not exist as an output or a stage name in '{file}'" + ) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 145144189c..32030ea6a9 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -1,3 +1,4 @@ +import logging import os from contextlib import contextmanager from functools import wraps @@ -5,17 +6,25 @@ from funcy import cached_property, cat, first from dvc.config import Config +from dvc.dvcfile import PIPELINE_FILE, Dvcfile, is_valid_filename from dvc.exceptions import FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError -from dvc.exceptions import NotDvcRepoError, OutputNotFoundError +from dvc.exceptions import ( + NoOutputOrStageError, + NotDvcRepoError, + OutputNotFoundError, +) from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.repo.tree import RepoTree from dvc.utils.fs import path_isin +from ..stage.exceptions import StageFileDoesNotExistError, StageNotFound from ..utils import parse_target from .graph import check_acyclic, get_pipeline, get_pipelines +logger = logging.getLogger(__name__) + def locked(f): @wraps(f) @@ -181,6 +190,25 @@ def _ignore(self): self.scm.ignore_list(flist) + def get_stage(self, path=None, name=None): + if not path: + path = PIPELINE_FILE + logger.debug("Assuming '%s' to be a stage inside '%s'", name, path) + + dvcfile = Dvcfile(self, path) + return dvcfile.stages[name] + + def get_stages(self, path=None, name=None): + if not path: + path = PIPELINE_FILE + logger.debug("Assuming '%s' to be a stage inside '%s'", name, path) + + if name: + return [self.get_stage(path, name)] + + dvcfile = Dvcfile(self, path) + return list(dvcfile.stages.values()) + def check_modified_graph(self, new_stages): """Generate graph including the new stage to check for errors""" # Building graph might be costly for the ones with many DVC-files, @@ -204,10 +232,9 @@ def _collect_inside(self, path, graph): stages = nx.dfs_postorder_nodes(graph) return [stage for stage in stages if path_isin(stage.path, path)] - def collect(self, target, with_deps=False, recursive=False, graph=None): - import networkx as nx - from ..dvcfile import Dvcfile - + def collect( + self, target=None, with_deps=False, recursive=False, graph=None + ): if not target: return list(graph) if graph else self.stages @@ -217,36 +244,81 @@ def collect(self, target, with_deps=False, recursive=False, graph=None): ) path, name = parse_target(target) - dvcfile = Dvcfile(self, path) - stages = list(dvcfile.stages.filter(name).values()) + stages = self.get_stages(path, name) if not with_deps: return stages res = set() for stage in stages: - pipeline = get_pipeline(get_pipelines(graph or self.graph), stage) - res.update(nx.dfs_postorder_nodes(pipeline, stage)) + res.update(self._collect_pipeline(stage, graph=graph)) return res - def collect_granular(self, target, *args, **kwargs): - from ..dvcfile import Dvcfile, is_valid_filename + def _collect_pipeline(self, stage, graph=None): + import networkx as nx + + pipeline = get_pipeline(get_pipelines(graph or self.graph), stage) + return nx.dfs_postorder_nodes(pipeline, stage) + + def _collect_from_default_dvcfile(self, target): + dvcfile = Dvcfile(self, PIPELINE_FILE) + if dvcfile.exists(): + return dvcfile.stages.get(target) + def collect_granular( + self, target=None, with_deps=False, recursive=False, graph=None + ): + """ + Priority is in the order of following in case of ambiguity: + - .dvc file or .yaml file + - dir if recursive and directory exists + - stage_name + - output file + """ if not target: return [(stage, None) for stage in self.stages] file, name = parse_target(target) - if is_valid_filename(file) and not kwargs.get("with_deps"): - # Optimization: do not collect the graph for a specific .dvc target - stages = Dvcfile(self, file).stages.filter(name) - return [(stage, None) for stage in stages.values()] + stages = [] - try: - (out,) = self.find_outs_by_path(file, strict=False) - filter_info = PathInfo(os.path.abspath(file)) - return [(out.stage, filter_info)] - except OutputNotFoundError: - stages = self.collect(target, *args, **kwargs) - return [(stage, None) for stage in stages] + # Optimization: do not collect the graph for a specific target + if not file: + # parsing is ambiguous when it does not have a colon + # or if it's not a dvcfile, as it can be a stage name + # in `dvc.yaml` or, an output in a stage. + logger.debug( + "Checking if stage '%s' is in '%s'", target, PIPELINE_FILE + ) + if not (recursive and os.path.isdir(target)): + stage = self._collect_from_default_dvcfile(target) + if stage: + stages = ( + self._collect_pipeline(stage) if with_deps else [stage] + ) + elif not with_deps and is_valid_filename(file): + stages = self.get_stages(file, name) + + if not stages: + if not (recursive and os.path.isdir(target)): + try: + (out,) = self.find_outs_by_path(target, strict=False) + filter_info = PathInfo(os.path.abspath(target)) + return [(out.stage, filter_info)] + except OutputNotFoundError: + pass + + try: + stages = self.collect(target, with_deps, recursive, graph) + except StageFileDoesNotExistError as exc: + # collect() might try to use `target` as a stage name + # and throw error that dvc.yaml does not exist, whereas it + # should say that both stage name and file does not exist. + if file and is_valid_filename(file): + raise + raise NoOutputOrStageError(target, exc.file) from exc + except StageNotFound as exc: + raise NoOutputOrStageError(target, exc.file) from exc + + return [(stage, None) for stage in stages] def used_cache( self, @@ -443,16 +515,13 @@ def plot_templates(self): return PlotTemplates(self.dvc_dir) def _collect_stages(self): - from dvc.dvcfile import Dvcfile, is_valid_filename - stages = [] outs = set() for root, dirs, files in self.tree.walk(self.root_dir): for file_name in filter(is_valid_filename, files): path = os.path.join(root, file_name) - stage_loader = Dvcfile(self, path).stages - stages.extend(stage_loader.values()) + stages.extend(self.get_stages(path)) outs.update( out.fspath for stage in stages diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 9adda0e047..bcfe31223f 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -1,7 +1,11 @@ import logging import os -from dvc.exceptions import CheckoutError, CheckoutErrorSuggestGit +from dvc.exceptions import ( + CheckoutError, + CheckoutErrorSuggestGit, + NoOutputOrStageError, +) from dvc.progress import Tqdm from dvc.utils import relpath @@ -18,11 +22,11 @@ def _get_unused_links(repo): return repo.state.get_unused_links(used) -def _fspath_dir(path, root): +def _fspath_dir(path): if not os.path.exists(str(path)): return str(path) - path = relpath(path, root) + path = relpath(path) return os.path.join(path, "") if os.path.isdir(path) else path @@ -56,7 +60,7 @@ def _checkout( targets = [None] unused = _get_unused_links(self) - stats["deleted"] = [_fspath_dir(u, self.root_dir) for u in unused] + stats["deleted"] = [_fspath_dir(u) for u in unused] self.state.remove_links(unused) if isinstance(targets, str): @@ -70,7 +74,11 @@ def _checkout( target, with_deps=with_deps, recursive=recursive ) ) - except (StageFileDoesNotExistError, StageFileBadNameError) as exc: + except ( + StageFileDoesNotExistError, + StageFileBadNameError, + NoOutputOrStageError, + ) as exc: if not target: raise raise CheckoutErrorSuggestGit(target) from exc @@ -87,9 +95,7 @@ def _checkout( filter_info=filter_info, ) for key, items in result.items(): - stats[key].extend( - _fspath_dir(path, self.root_dir) for path in items - ) + stats[key].extend(_fspath_dir(path) for path in items) if stats.get("failed"): raise CheckoutError(stats["failed"], stats) diff --git a/dvc/repo/commit.py b/dvc/repo/commit.py index 4e63298153..e63ae59a42 100644 --- a/dvc/repo/commit.py +++ b/dvc/repo/commit.py @@ -46,3 +46,4 @@ def commit(self, target, with_deps=False, recursive=False, force=False): stage.commit() Dvcfile(self, stage.path).dump(stage) + return stages diff --git a/dvc/repo/lock.py b/dvc/repo/lock.py index b3d006950a..9b1f592683 100644 --- a/dvc/repo/lock.py +++ b/dvc/repo/lock.py @@ -3,13 +3,11 @@ @locked def lock(self, target, unlock=False): - from .. import dvcfile from dvc.utils import parse_target path, name = parse_target(target) - dvcfile = dvcfile.Dvcfile(self, path) - stage = dvcfile.stages[name] + stage = self.get_stage(path, name) stage.locked = False if unlock else True - dvcfile.dump(stage, update_pipeline=True) + stage.dvcfile.dump(stage, update_pipeline=True) return stage diff --git a/dvc/repo/remove.py b/dvc/repo/remove.py index 5c1f1023cf..d00f330099 100644 --- a/dvc/repo/remove.py +++ b/dvc/repo/remove.py @@ -8,15 +8,14 @@ @locked def remove(self, target, dvc_only=False): - from ..dvcfile import Dvcfile + from ..dvcfile import Dvcfile, is_valid_filename path, name = parse_target(target) - dvcfile = Dvcfile(self, path) - stages = list(dvcfile.stages.filter(name).values()) + stages = self.get_stages(path, name) for stage in stages: stage.remove_outs(force=True) - if not dvc_only: - dvcfile.remove() + if path and is_valid_filename(path) and not dvc_only: + Dvcfile(self, path).remove() return stages diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index de36f93c8d..e6cd902fae 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -61,7 +61,6 @@ def reproduce( all_pipelines=False, **kwargs ): - from ..dvcfile import Dvcfile from dvc.utils import parse_target if not target and not all_pipelines: @@ -81,8 +80,7 @@ def reproduce( if all_pipelines: pipelines = active_pipelines else: - dvcfile = Dvcfile(self, path) - stage = dvcfile.stages[name] + stage = self.get_stage(path, name) pipelines = [get_pipeline(active_pipelines, stage)] targets = [] diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 8a20381401..129fc4818f 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -144,14 +144,10 @@ def dvcfile(self, dvcfile): self._dvcfile = dvcfile def __repr__(self): - return "Stage: '{path}'".format( - path=self.path_in_repo if self.path else "No path" - ) + return f"Stage: '{self.addressing}'" def __str__(self): - return "stage: '{path}'".format( - path=self.relpath if self.path else "No path" - ) + return f"stage: '{self.addressing}'" @property def addressing(self): @@ -159,7 +155,7 @@ def addressing(self): Useful for alternative presentations where we don't need `Stage:` prefix. """ - return self.relpath + return self.relpath if self.path else "No path" def __hash__(self): return hash(self.path_in_repo) @@ -530,19 +526,13 @@ def __eq__(self, other): def __hash__(self): return hash((self.path_in_repo, self.name)) - def __repr__(self): - return "Stage: '{path}:{name}'".format( - path=self.relpath if self.path else "No path", name=self.name - ) - - def __str__(self): - return "stage: '{path}:{name}'".format( - path=self.relpath if self.path else "No path", name=self.name - ) - @property def addressing(self): - return super().addressing + ":" + self.name + from dvc.dvcfile import PIPELINE_FILE + + if self.path and self.relpath == PIPELINE_FILE: + return self.name + return f"{super().addressing}:{self.name}" def reload(self): return self.dvcfile.stages[self.name] diff --git a/dvc/stage/exceptions.py b/dvc/stage/exceptions.py index 4be365238f..6ad7c2f81e 100644 --- a/dvc/stage/exceptions.py +++ b/dvc/stage/exceptions.py @@ -15,15 +15,8 @@ class StageFileFormatError(DvcException): class StageFileDoesNotExistError(DvcException): def __init__(self, fname): - from dvc.dvcfile import DVC_FILE_SUFFIX, is_dvc_file - - msg = f"'{fname}' does not exist." - - sname = fname + DVC_FILE_SUFFIX - if is_dvc_file(sname): - msg += f" Do you mean '{sname}'?" - - super().__init__(msg) + self.file = fname + super().__init__(f"'{self.file}' does not exist.") class StageFileAlreadyExistsError(DvcException): @@ -87,8 +80,10 @@ def __init__(self, missing_files): class StageNotFound(KeyError, DvcException): def __init__(self, file, name): + self.file = file.relpath + self.name = name super().__init__( - f"Stage '{name}' not found inside '{file.relpath}' file" + f"Stage '{self.name}' not found inside '{self.file}' file" ) diff --git a/dvc/stage/loader.py b/dvc/stage/loader.py index d973f31a94..169d5d3039 100644 --- a/dvc/stage/loader.py +++ b/dvc/stage/loader.py @@ -30,16 +30,6 @@ def __init__(self, dvcfile, stages_data, lockfile_data=None): self.stages_data = stages_data or {} self.lockfile_data = lockfile_data or {} - def filter(self, item=None): - if not item: - return self - - if item not in self: - raise StageNotFound(self.dvcfile, item) - - data = {item: self.stages_data[item]} if item in self else {} - return self.__class__(self.dvcfile, data, self.lockfile_data) - @staticmethod def fill_from_lock(stage, lock_data): from .params import StageParams @@ -224,9 +214,6 @@ def __init__(self, dvcfile, stage_data, stage_text=None): self.stage_data = stage_data or {} self.stage_text = stage_text - def filter(self, item=None): - return self - def __getitem__(self, item): if item: logger.warning( diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index b614d6cbe6..95e1278768 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -395,7 +395,7 @@ def format_link(link): def parse_target(target, default=None): - from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK + from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK, is_valid_filename from dvc.exceptions import DvcException if not target: @@ -409,13 +409,19 @@ def parse_target(target, default=None): match.group("path"), match.group("name"), ) + if path: + if os.path.basename(path) == PIPELINE_LOCK: + raise DvcException( + "Did you mean: `{}`?".format( + target.replace(".lock", ".yaml", 1) + ) + ) + if not name: + ret = (target, None) + return ret if is_valid_filename(target) else ret[::-1] + if not path: path = default or PIPELINE_FILE logger.debug("Assuming file to be '%s'", path) - if os.path.basename(path) == PIPELINE_LOCK: - raise DvcException( - "Did you mean: `{}`?".format(target.replace(".lock", ".yaml", 1)) - ) - return path, name diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 171f16c93d..d47b597391 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -7,22 +7,20 @@ import pytest from mock import patch -from dvc.dvcfile import DVC_FILE_SUFFIX, Dvcfile +from dvc.dvcfile import DVC_FILE_SUFFIX, PIPELINE_FILE, Dvcfile from dvc.exceptions import ( CheckoutError, CheckoutErrorSuggestGit, ConfirmRemoveError, DvcException, + NoOutputOrStageError, ) from dvc.main import main from dvc.remote import S3Remote from dvc.remote.local import LocalRemote from dvc.repo import Repo as DvcRepo from dvc.stage import Stage -from dvc.stage.exceptions import ( - StageFileBadNameError, - StageFileDoesNotExistError, -) +from dvc.stage.exceptions import StageFileDoesNotExistError from dvc.system import System from dvc.utils import relpath from dvc.utils.fs import walk_files @@ -403,17 +401,26 @@ class TestCheckoutSuggestGit(TestRepro): def test(self): try: - self.dvc.checkout(targets=["gitbranch"]) + self.dvc.checkout(targets="gitbranch") except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, StageFileDoesNotExistError) + self.assertIsInstance(exc.__cause__, NoOutputOrStageError) + self.assertIsInstance( + exc.__cause__.__cause__, StageFileDoesNotExistError + ) + + try: + self.dvc.checkout(targets=self.FOO) + except DvcException as exc: + self.assertIsInstance(exc, CheckoutErrorSuggestGit) + self.assertIsInstance(exc.__cause__, NoOutputOrStageError) self.assertIsNone(exc.__cause__.__cause__) try: - self.dvc.checkout(targets=[self.FOO]) + self.dvc.checkout(targets="looks-like-dvcfile.dvc") except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, StageFileBadNameError) + self.assertIsInstance(exc.__cause__, StageFileDoesNotExistError) self.assertIsNone(exc.__cause__.__cause__) @@ -770,9 +777,51 @@ def test_checkout_for_external_outputs(tmp_dir, dvc): assert stats == {**empty_checkout, "modified": [str(file_path)]} -def test_checkouts_for_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): - from dvc.dvcfile import PIPELINE_FILE +def test_checkouts_with_different_addressing(tmp_dir, dvc, run_copy): + tmp_dir.gen({"foo": "foo", "lorem": "lorem"}) + run_copy("foo", "bar", name="copy-foo-bar") + run_copy("lorem", "ipsum", name="copy-lorem-ipsum") + + (tmp_dir / "bar").unlink() + (tmp_dir / "ipsum").unlink() + assert set(dvc.checkout(PIPELINE_FILE)["added"]) == {"bar", "ipsum"} + + (tmp_dir / "bar").unlink() + (tmp_dir / "ipsum").unlink() + assert set(dvc.checkout(":")["added"]) == {"bar", "ipsum"} + + (tmp_dir / "bar").unlink() + assert dvc.checkout("copy-foo-bar")["added"] == ["bar"] + (tmp_dir / "bar").unlink() + assert dvc.checkout("dvc.yaml:copy-foo-bar")["added"] == ["bar"] + + (tmp_dir / "bar").unlink() + assert dvc.checkout(":copy-foo-bar")["added"] == ["bar"] + + (tmp_dir / "bar").unlink() + (tmp_dir / "data").mkdir() + with (tmp_dir / "data").chdir(): + assert dvc.checkout(relpath(tmp_dir / "dvc.yaml") + ":copy-foo-bar")[ + "added" + ] == [relpath(tmp_dir / "bar")] + + (tmp_dir / "bar").unlink() + assert dvc.checkout("bar")["added"] == ["bar"] + + +def test_checkouts_on_same_stage_name_and_output_name(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + run_copy("foo", "bar", name="copy-foo-bar") + run_copy("foo", "copy-foo-bar", name="make_collision") + + (tmp_dir / "bar").unlink() + (tmp_dir / "copy-foo-bar").unlink() + assert dvc.checkout("copy-foo-bar")["added"] == ["bar"] + assert dvc.checkout("./copy-foo-bar")["added"] == ["copy-foo-bar"] + + +def test_checkouts_for_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): tmp_dir.gen("foo", "foo") stage1 = run_copy("foo", "bar", name="copy-foo-bar") tmp_dir.gen("lorem", "lorem") diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py index 3b3872e941..870573e9f9 100644 --- a/tests/func/test_commit.py +++ b/tests/func/test_commit.py @@ -1,5 +1,6 @@ import pytest +from dvc.dvcfile import PIPELINE_FILE from dvc.stage.exceptions import StageCommitError from dvc.utils.stage import dump_stage_file, load_stage_file @@ -81,3 +82,16 @@ def test_commit_no_exec(tmp_dir, dvc): assert dvc.status(stage.path) dvc.commit(stage.path, force=True) assert dvc.status(stage.path) == {} + + +def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") + assert dvc.status(stage.addressing) + assert dvc.commit(stage.addressing, force=True) == [stage] + assert not dvc.status(stage.addressing) + + # just to confirm different variants work + assert dvc.commit(f":{stage.addressing}") == [stage] + assert dvc.commit(f"{PIPELINE_FILE}:{stage.addressing}") == [stage] + assert dvc.commit(PIPELINE_FILE) == [stage] diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index bbb201c296..fb5da6e751 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -932,3 +932,24 @@ def test_push_pull_all(tmp_dir, scm, dvc, setup_remote, key, expected): clean(["foo", "bar", "baz"], dvc) assert dvc.pull(**{key: True})["fetched"] == expected + + +def test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, setup_remote): + remote_path = setup_remote(dvc) + tmp_dir.dvc_gen("foo", "foo") + run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") + + dvc.push("copy-foo-bar") + assert len(recurse_list_dir(remote_path)) == 1 + # pushing everything so as we can check pull/fetch only downloads + # from specified targets + dvc.push() + clean(["foo", "bar"], dvc) + + dvc.pull("copy-foo-bar") + assert (tmp_dir / "bar").exists() + assert len(recurse_list_dir(dvc.cache.local.cache_dir)) == 1 + clean(["bar"], dvc) + + dvc.fetch("copy-foo-bar") + assert len(recurse_list_dir(dvc.cache.local.cache_dir)) == 1 diff --git a/tests/func/test_dvcfile.py b/tests/func/test_dvcfile.py index 51e6e00cee..4ba82e5bd5 100644 --- a/tests/func/test_dvcfile.py +++ b/tests/func/test_dvcfile.py @@ -1,10 +1,7 @@ import pytest from dvc.dvcfile import PIPELINE_FILE, Dvcfile -from dvc.stage.exceptions import ( - StageFileDoesNotExistError, - StageNameUnspecified, -) +from dvc.stage.exceptions import StageFileDoesNotExistError from dvc.stage.loader import StageNotFound @@ -163,37 +160,3 @@ def test_stage_collection(tmp_dir, dvc): single_stage=True, ) assert {s for s in dvc.stages} == {stage1, stage3, stage2} - - -def test_stage_filter(tmp_dir, dvc, run_copy): - tmp_dir.gen("foo", "foo") - stage1 = run_copy("foo", "bar", name="copy-foo-bar") - stage2 = run_copy("bar", "bax", name="copy-bar-bax") - stage3 = run_copy("bax", "baz", name="copy-bax-baz") - stages = Dvcfile(dvc, PIPELINE_FILE).stages - assert set(stages.filter(None).values()) == { - stage1, - stage2, - stage3, - } - assert set(stages.filter("copy-bar-bax").values()) == {stage2} - assert stages.filter("copy-bar-bax").get("copy-bar-bax") == stage2 - with pytest.raises(StageNameUnspecified): - stages.get(None) - - with pytest.raises(StageNotFound): - assert stages["unknown"] - - with pytest.raises(StageNotFound): - assert stages.filter("unknown") - - -def test_stage_filter_in_singlestage_file(tmp_dir, dvc, run_copy): - tmp_dir.gen("foo", "foo") - stage = run_copy("foo", "bar", single_stage=True) - dvcfile = Dvcfile(dvc, stage.path) - assert set(dvcfile.stages.filter(None).values()) == {stage} - assert dvcfile.stages.filter(None).get(None) == stage - assert dvcfile.stages.filter("copy-bar-bax").get("copy-bar-bax") == stage - assert dvcfile.stages.filter("copy-bar-bax").get(None) == stage - assert set(dvcfile.stages.filter("unknown").values()) == {stage} diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index 765b45dfac..d52c8852b5 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -296,20 +296,20 @@ def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog): with caplog.at_level(logging.INFO, "dvc"): command._show("foobar.dvc", False, False, False) output = caplog.text.splitlines() - assert "dvc.yaml:copy-foo-bar" in output[0] + assert "copy-foo-bar" in output[0] assert "foobar.dvc" in output[1] caplog.clear() with caplog.at_level(logging.INFO, "dvc"): - command._show("dvc.yaml:copy-foo-bar", False, False, False) - assert "dvc.yaml:copy-foo-bar" in caplog.text + command._show("copy-foo-bar", False, False, False) + assert "copy-foo-bar" in caplog.text assert "foobar.dvc" not in caplog.text command = CmdPipelineList([]) caplog.clear() with caplog.at_level(logging.INFO, "dvc"): command.run() - assert "dvc.yaml:copy-foo-bar" in caplog.text + assert "copy-foo-bar" in caplog.text assert "foobar.dvc" in caplog.text assert "1 pipelines in total" @@ -320,13 +320,13 @@ def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy): run_copy("bar", "foobar", single_stage=True) command = CmdPipelineShow([]) nodes, edges, _ = command._build_graph("foobar.dvc") - assert set(nodes) == {"dvc.yaml:copy-foo-bar", "foobar.dvc"} + assert set(nodes) == {"copy-foo-bar", "foobar.dvc"} assert set(edges) == { - ("foobar.dvc", "dvc.yaml:copy-foo-bar"), + ("foobar.dvc", "copy-foo-bar"), } - nodes, *_ = command._build_graph("dvc.yaml:copy-foo-bar") - assert set(nodes) == {"dvc.yaml:copy-foo-bar"} + nodes, *_ = command._build_graph("copy-foo-bar") + assert set(nodes) == {"copy-foo-bar"} def test_pipeline_multi_outputs_stages(dvc): diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 5fc55c1194..76138cf8d4 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -1,13 +1,24 @@ import os +from operator import itemgetter + +import pytest from dvc.cache import Cache +from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK +from dvc.exceptions import NoOutputOrStageError +from dvc.path_info import PathInfo from dvc.repo import Repo +from dvc.stage.exceptions import ( + StageFileDoesNotExistError, + StageNameUnspecified, + StageNotFound, +) from dvc.system import System +from dvc.utils import relpath +from dvc.utils.fs import remove def test_destroy(tmp_dir, dvc, run_copy): - from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK - dvc.config["cache"]["type"] = ["symlink"] dvc.cache = Cache(dvc) @@ -88,6 +99,12 @@ def collect_outs(*args, **kwargs): assert collect_outs("dvc.yaml:copy-foo-foobar", recursive=True) == { "foobar" } + assert collect_outs("copy-foo-foobar") == {"foobar"} + assert collect_outs("copy-foo-foobar", with_deps=True) == { + "foobar", + "foo", + } + assert collect_outs("copy-foo-foobar", recursive=True) == {"foobar"} run_copy("foobar", "baz", name="copy-foobar-baz") assert collect_outs("dvc.yaml") == {"foobar", "baz"} @@ -98,6 +115,26 @@ def collect_outs(*args, **kwargs): } +def test_collect_dir_recursive(tmp_dir, dvc, run_head): + tmp_dir.gen({"dir": {"foo": "foo"}}) + (stage1,) = dvc.add("dir", recursive=True) + with (tmp_dir / "dir").chdir(): + stage2 = run_head("foo", name="copy-foo-bar") + stage3 = run_head("foo-1", single_stage=True) + assert set(dvc.collect("dir", recursive=True)) == {stage1, stage2, stage3} + + +def test_collect_with_not_existing_output_or_stage_name( + tmp_dir, dvc, run_copy +): + with pytest.raises(StageFileDoesNotExistError): + dvc.collect("some_file") + tmp_dir.dvc_gen("foo", "foo") + run_copy("foo", "bar", name="copy-foo-bar") + with pytest.raises(StageNotFound): + dvc.collect("some_file") + + def test_stages(tmp_dir, dvc): def stages(): return {stage.relpath for stage in Repo(os.fspath(tmp_dir)).stages} @@ -113,3 +150,215 @@ def stages(): tmp_dir.gen(".dvcignore", "dir") assert stages() == {"file.dvc"} + + +@pytest.fixture +def stages(tmp_dir, run_copy): + stage1, stage2 = tmp_dir.dvc_gen({"foo": "foo", "lorem": "lorem"}) + return { + "foo-generate": stage1, + "lorem-generate": stage2, + "copy-foo-bar": run_copy("foo", "bar", single_stage=True), + "copy-bar-foobar": run_copy("bar", "foobar", name="copy-bar-foobar"), + "copy-lorem-ipsum": run_copy("lorem", "ipsum", name="lorem-ipsum"), + } + + +def test_collect_granular_with_no_target(tmp_dir, dvc, stages): + assert set(map(itemgetter(0), dvc.collect_granular())) == set( + stages.values() + ) + assert list(map(itemgetter(1), dvc.collect_granular())) == [None] * len( + stages + ) + + +def test_collect_granular_with_target(tmp_dir, dvc, stages): + assert dvc.collect_granular("bar.dvc") == [(stages["copy-foo-bar"], None)] + assert dvc.collect_granular(PIPELINE_FILE) == [ + (stages["copy-bar-foobar"], None), + (stages["copy-lorem-ipsum"], None), + ] + assert dvc.collect_granular(":") == [ + (stages["copy-bar-foobar"], None), + (stages["copy-lorem-ipsum"], None), + ] + assert dvc.collect_granular("copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + assert dvc.collect_granular(":copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + assert dvc.collect_granular("dvc.yaml:copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + + with (tmp_dir / dvc.DVC_DIR).chdir(): + assert dvc.collect_granular( + relpath(tmp_dir / PIPELINE_FILE) + ":copy-bar-foobar" + ) == [(stages["copy-bar-foobar"], None)] + + assert dvc.collect_granular("foobar") == [ + (stages["copy-bar-foobar"], PathInfo(tmp_dir / "foobar")) + ] + + +@pytest.mark.parametrize( + "target", + [ + "not_existing.dvc", + "not_existing.dvc:stage_name", + "not_existing/dvc.yaml", + "not_existing/dvc.yaml:stage_name", + ], +) +def test_collect_with_not_existing_dvcfile(tmp_dir, dvc, target): + with pytest.raises(StageFileDoesNotExistError): + dvc.collect_granular(target) + with pytest.raises(StageFileDoesNotExistError): + dvc.collect(target) + + +def test_collect_granular_with_not_existing_output_or_stage_name(tmp_dir, dvc): + with pytest.raises(NoOutputOrStageError): + dvc.collect_granular("some_file") + with pytest.raises(NoOutputOrStageError): + dvc.collect_granular("some_file", recursive=True) + + +def test_collect_granular_with_deps(tmp_dir, dvc, stages): + assert set( + map(itemgetter(0), dvc.collect_granular("bar.dvc", with_deps=True)) + ) == {stages["copy-foo-bar"], stages["foo-generate"]} + assert set( + map( + itemgetter(0), + dvc.collect_granular("copy-bar-foobar", with_deps=True), + ) + ) == { + stages["copy-bar-foobar"], + stages["copy-foo-bar"], + stages["foo-generate"], + } + assert set( + map( + itemgetter(0), dvc.collect_granular(PIPELINE_FILE, with_deps=True), + ) + ) == set(stages.values()) + + +def test_collect_granular_same_output_name_stage_name(tmp_dir, dvc, run_copy): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + (stage2,) = tmp_dir.dvc_gen("copy-foo-bar", "copy-foo-bar") + stage3 = run_copy("foo", "bar", name="copy-foo-bar") + + assert dvc.collect_granular("copy-foo-bar") == [(stage3, None)] + + coll = dvc.collect_granular("copy-foo-bar", with_deps=True) + assert set(map(itemgetter(0), coll)) == {stage3, stage1} + assert list(map(itemgetter(1), coll)) == [None] * 2 + + assert dvc.collect_granular("./copy-foo-bar") == [ + (stage2, PathInfo(tmp_dir / "copy-foo-bar")) + ] + assert dvc.collect_granular("./copy-foo-bar", with_deps=True) == [ + (stage2, PathInfo(tmp_dir / "copy-foo-bar")) + ] + + +def test_collect_granular_priority_on_collision(tmp_dir, dvc, run_copy): + tmp_dir.gen({"dir": {"foo": "foo"}, "foo": "foo"}) + (stage1,) = dvc.add("dir", recursive=True) + stage2 = run_copy("foo", "bar", name="dir") + + assert dvc.collect_granular("dir") == [(stage2, None)] + assert dvc.collect_granular("dir", recursive=True) == [(stage1, None)] + + remove(tmp_dir / "dir") + + assert dvc.collect_granular("dir") == [(stage2, None)] + assert dvc.collect_granular("dir", recursive=True) == [(stage2, None)] + + +def test_collect_granular_collision_output_dir_stage_name( + tmp_dir, dvc, run_copy +): + stage1, *_ = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) + stage3 = run_copy("foo", "bar", name="dir") + + assert dvc.collect_granular("dir") == [(stage3, None)] + assert not dvc.collect_granular("dir", recursive=True) + assert dvc.collect_granular("./dir") == [ + (stage1, PathInfo(tmp_dir / "dir")) + ] + + +def test_collect_granular_not_existing_stage_name(tmp_dir, dvc, run_copy): + tmp_dir.dvc_gen("foo", "foo") + (stage,) = tmp_dir.dvc_gen("copy-foo-bar", "copy-foo-bar") + run_copy("foo", "bar", name="copy-foo-bar") + + assert dvc.collect_granular("copy-foo-bar.dvc:stage_name_not_needed") == [ + (stage, None) + ] + with pytest.raises(StageNotFound): + dvc.collect_granular("dvc.yaml:does-not-exist") + + +def test_get_stages(tmp_dir, dvc, run_copy): + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages() + + tmp_dir.gen("foo", "foo") + stage1 = run_copy("foo", "bar", name="copy-foo-bar") + stage2 = run_copy("bar", "foobar", name="copy-bar-foobar") + + assert set(dvc.get_stages()) == {stage1, stage2} + assert set(dvc.get_stages(path=PIPELINE_FILE)) == {stage1, stage2} + assert set(dvc.get_stages(name="copy-bar-foobar")) == {stage2} + assert set(dvc.get_stages(path=PIPELINE_FILE, name="copy-bar-foobar")) == { + stage2 + } + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages(path=relpath(tmp_dir / ".." / PIPELINE_FILE)) + + with pytest.raises(StageNotFound): + dvc.get_stages(path=PIPELINE_FILE, name="copy") + + +def test_get_stages_old_dvcfile(tmp_dir, dvc): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + assert set(dvc.get_stages("foo.dvc")) == {stage1} + assert set(dvc.get_stages("foo.dvc", name="foo-generate")) == {stage1} + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages(path=relpath(tmp_dir / ".." / "foo.dvc")) + + +def test_get_stage(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + stage1 = run_copy("foo", "bar", name="copy-foo-bar") + + with pytest.raises(StageNameUnspecified): + dvc.get_stage() + + with pytest.raises(StageNameUnspecified): + dvc.get_stage(path=PIPELINE_FILE) + + assert dvc.get_stage(path=PIPELINE_FILE, name="copy-foo-bar") == stage1 + assert dvc.get_stage(name="copy-foo-bar") == stage1 + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stage(path="something.yaml", name="name") + + with pytest.raises(StageNotFound): + dvc.get_stage(name="random_name") + + +def test_get_stage_single_stage_dvcfile(tmp_dir, dvc): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + assert dvc.get_stage("foo.dvc") == stage1 + assert dvc.get_stage("foo.dvc", name="jpt") == stage1 + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stage(path="bar.dvc", name="name") diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index d5e5077384..63eaf82253 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -56,7 +56,7 @@ def _run(self, **kwargs): @staticmethod def _get_stage_target(stage): - return stage.path + return stage.addressing class TestRepro(SingleStageRun, TestDvc): diff --git a/tests/func/test_repro_multistage.py b/tests/func/test_repro_multistage.py index 63b3b87557..27c3ba92fc 100644 --- a/tests/func/test_repro_multistage.py +++ b/tests/func/test_repro_multistage.py @@ -32,7 +32,7 @@ def _run(self, **kwargs): @staticmethod def _get_stage_target(stage): - return stage.path + ":" + stage.name + return stage.addressing class TestReproFailMultiStage(MultiStageRun, test_repro.TestReproFail): @@ -312,15 +312,13 @@ def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-file") - target = ":copy-file" + target = "copy-file" assert not dvc.reproduce(target) stage.cmd = " ".join(stage.cmd.split()) # change cmd spacing by two PipelineFile(dvc, PIPELINE_FILE)._dump_pipeline_file(stage) - assert dvc.status([target]) == { - PIPELINE_FILE + target: ["changed command"] - } + assert dvc.status([target]) == {target: ["changed command"]} assert dvc.reproduce(target)[0] == stage diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index 284fe3b48c..2d7c9ac36b 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -8,7 +8,7 @@ from dvc.output.local import LocalOutput from dvc.remote.local import LocalRemote from dvc.repo import Repo -from dvc.stage import Stage +from dvc.stage import PipelineStage, Stage from dvc.stage.exceptions import StageFileFormatError from dvc.utils.stage import dump_stage_file, load_stage_file from tests.basic_env import TestDvc @@ -196,19 +196,39 @@ def test_parent_repo_collect_stages(tmp_dir, scm, dvc): assert subrepo_stages != [] -def test_stage_addressing(tmp_dir, dvc, run_copy): +def test_stage_strings_representation(tmp_dir, dvc, run_copy): tmp_dir.dvc_gen("foo", "foo") stage1 = run_copy("foo", "bar", single_stage=True) assert stage1.addressing == "bar.dvc" + assert repr(stage1) == "Stage: 'bar.dvc'" + assert str(stage1) == "stage: 'bar.dvc'" stage2 = run_copy("bar", "baz", name="copy-bar-baz") - assert stage2.addressing == "dvc.yaml:copy-bar-baz" + assert stage2.addressing == "copy-bar-baz" + assert repr(stage2) == "Stage: 'copy-bar-baz'" + assert str(stage2) == "stage: 'copy-bar-baz'" folder = tmp_dir / "dir" folder.mkdir() with folder.chdir(): - assert stage1.addressing == os.path.relpath(stage1.path) - assert ( - stage2.addressing - == os.path.relpath(stage2.path) + ":" + stage2.name - ) + rel_path = os.path.relpath(stage1.path) + assert stage1.addressing == rel_path + assert repr(stage1) == f"Stage: '{rel_path}'" + assert str(stage1) == f"stage: '{rel_path}'" + + rel_path = os.path.relpath(stage2.path) + assert stage2.addressing == f"{rel_path}:{stage2.name}" + assert repr(stage2) == f"Stage: '{rel_path}:{stage2.name}'" + assert str(stage2) == f"stage: '{rel_path}:{stage2.name}'" + + +def test_stage_on_no_path_string_repr(tmp_dir, dvc): + s = Stage(dvc) + assert s.addressing == "No path" + assert repr(s) == "Stage: 'No path'" + assert str(s) == "stage: 'No path'" + + p = PipelineStage(dvc, name="stage_name") + assert p.addressing == "No path:stage_name" + assert repr(p) == "Stage: 'No path:stage_name'" + assert str(p) == "stage: 'No path:stage_name'" diff --git a/tests/func/test_status.py b/tests/func/test_status.py index e94934f194..c79c361ada 100644 --- a/tests/func/test_status.py +++ b/tests/func/test_status.py @@ -73,12 +73,12 @@ def test_status_on_pipeline_stages(tmp_dir, dvc, run_copy): stage.cmd = " ".join(stage.cmd.split()) stage.dvcfile._dump_pipeline_file(stage) - assert dvc.status() == {"dvc.yaml:copy-foo-bar": ["changed command"]} + assert dvc.status("copy-foo-bar") == {"copy-foo-bar": ["changed command"]} # delete outputs (tmp_dir / "bar").unlink() assert dvc.status() == { - "dvc.yaml:copy-foo-bar": [ + "copy-foo-bar": [ {"changed outs": {"bar": "deleted"}}, "changed command", ] @@ -86,7 +86,7 @@ def test_status_on_pipeline_stages(tmp_dir, dvc, run_copy): (tmp_dir / "foo").unlink() assert dvc.status() == { "foo.dvc": [{"changed outs": {"foo": "deleted"}}], - "dvc.yaml:copy-foo-bar": [ + "copy-foo-bar": [ {"changed deps": {"foo": "deleted"}}, {"changed outs": {"bar": "deleted"}}, "changed command", diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py index 581cb06a71..6395d47a82 100644 --- a/tests/unit/repo/test_repo.py +++ b/tests/unit/repo/test_repo.py @@ -87,6 +87,21 @@ def test_collect_optimization(tmp_dir, dvc, mocker): dvc.collect_granular(stage.path) +def test_collect_optimization_on_stage_name(tmp_dir, dvc, mocker, run_copy): + tmp_dir.dvc_gen("foo", "foo") + stage = run_copy("foo", "bar", name="copy-foo-bar") + # Forget cached stages and graph and error out on collection + dvc._reset() + mocker.patch( + "dvc.repo.Repo.stages", + property(raiser(Exception("Should not collect"))), + ) + + # Should read stage directly instead of collecting the whole graph + assert dvc.collect("copy-foo-bar") == [stage] + assert dvc.collect_granular("copy-foo-bar") == [(stage, None)] + + def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy): # See https://github.com/iterative/dvc/issues/2671 for more info mock_collect_graph = mocker.patch("dvc.repo.Repo._collect_graph") diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 08b754df8a..1b8aa9c470 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -146,6 +146,9 @@ def test_resolve_output(inp, out, is_dir, expected, mocker): ["../models/stage.dvc", ("../models/stage.dvc", None), "def"], [":name", ("default", "name"), "default"], [":name", ("default", "name"), "default"], + ["something.dvc:name", ("something.dvc", "name"), None], + ["../something.dvc:name", ("../something.dvc", "name"), None], + ["file", (None, "file"), None], ], ) def test_parse_target(inp, out, default):