Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def stages(self):
return self.stage.collect_repo(onerror=error_handler)

def find_outs_by_path(self, path, outs=None, recursive=False, strict=True):
if not outs:
outs = [out for stage in self.stages for out in stage.outs]
# using `outs_graph` to ensure graph checks are run
outs = outs or self.outs_graph
Copy link
Collaborator Author

@skshetry skshetry Dec 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The disadvantage of it might be that, for example, dvc.api.open might start giving these unwanted errors, if their graphs are not correct.

Maybe, instead of giving these errors at all the times, we should only error out if n(outs) > 1 in the RepoTree?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But maybe it's not the tree that needs to worry about this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, this change is unnecessary to solve #5027, but for #4010.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are lots of assumptions around the code about our outs/stages being in-check with the proper DAG, so we indeed need to check the graph. There were some discussions around whether or not some of the dag checks are really that necessary (e.g. overlapping outputs might be used to dvc checkout particular versions of datasets on demand), but so far there wasn't a good scenario that people were actively asking for.


abs_path = os.path.abspath(path)
path_info = PathInfo(abs_path)
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _collect_outs(
) -> Outputs:
outs = {
out
for stage in repo.stages
for stage in repo.graph # using `graph` to ensure graph checks run
for out in (stage.deps if deps else stage.outs)
}
return set(filter(output_filter, outs)) if output_filter else outs
Expand Down
47 changes: 46 additions & 1 deletion tests/func/metrics/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@

import pytest

from dvc.exceptions import NoMetricsFoundError, NoMetricsParsedError
from dvc.exceptions import (
NoMetricsFoundError,
NoMetricsParsedError,
OverlappingOutputPathsError,
)
from dvc.path_info import PathInfo
from dvc.utils.fs import remove
from dvc.utils.serialize import dump_yaml, modify_yaml
from tests.func.metrics.utils import _write_json


Expand Down Expand Up @@ -61,3 +68,41 @@ def test_show_malformed_metric(
def test_show_no_metrics_files(tmp_dir, dvc, caplog, show):
with pytest.raises(NoMetricsFoundError):
show(dvc)


@pytest.mark.parametrize("clear_before_run", [True, False])
@pytest.mark.parametrize("typ", ["metrics", "plots"])
def test_metrics_show_overlap(
tmp_dir, dvc, run_copy_metrics, clear_before_run, typ
):
data_dir = PathInfo("data")
(tmp_dir / data_dir).mkdir()

outs = {typ: [str(data_dir / "m1.yaml")]}
dump_yaml(data_dir / "m1_temp.yaml", {"a": {"b": {"c": 2, "d": 1}}})
run_copy_metrics(
str(data_dir / "m1_temp.yaml"),
str(data_dir / "m1.yaml"),
single_stage=False,
commit=f"add m1 {typ}",
name="cp-m1",
**outs,
)
with modify_yaml("dvc.yaml") as d:
# trying to make an output overlaps error
d["stages"]["corrupted-stage"] = {
"cmd": "mkdir data",
"outs": ["data"],
}

# running by clearing and not clearing stuffs
# so as it works even for optimized cases
if clear_before_run:
remove(data_dir)
remove(dvc.cache.local.cache_dir)

dvc._reset()

show = dvc.metrics.show if typ == "metrics" else dvc.plots.show
with pytest.raises(OverlappingOutputPathsError):
show()
11 changes: 11 additions & 0 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
import shutil

import pytest

from dvc.exceptions import OutputDuplicationError
from dvc.repo import NotDvcRepoError, Repo, locked
from dvc.utils.fs import remove

Expand Down Expand Up @@ -29,6 +31,15 @@ def test_find_outs_by_path(tmp_dir, dvc, path):
assert outs[0].path_info == stage.outs[0].path_info


def test_find_outs_by_path_does_graph_checks(tmp_dir, dvc):
tmp_dir.dvc_gen("foo", "foo")
shutil.copyfile("foo.dvc", "foo-2.dvc")

dvc._reset()
with pytest.raises(OutputDuplicationError):
dvc.find_outs_by_path("foo")


@pytest.mark.parametrize(
"path",
[os.path.join("dir", "subdir", "file"), os.path.join("dir", "subdir")],
Expand Down