From 92aa7b2df922f8c094126391bdcc7defe9834223 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Mon, 26 Feb 2024 16:37:57 +0100 Subject: [PATCH 1/8] fix plots nested configuration --- dvc/repo/plots/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index ee783d25d8..01091479c7 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -442,7 +442,12 @@ def _resolve_definitions( config_path = os.fspath(config_path) config_dir = fs.dirname(config_path) result: dict[str, dict] = {} - for plot_id, plot_props in definitions.items(): + + # sort by key length to process the most general paths first + # then override them with more specific ones. + for plot_id, plot_props in sorted( + definitions.items(), key=lambda item: len(item[0]) + ): if plot_props is None: plot_props = {} if _id_is_path(plot_props): @@ -513,7 +518,7 @@ def unpack_if_dir(fs, path, props: dict[str, str], onerror: Optional[Callable] = if "data" in unpacked: for subpath in unpacked["data"]: - result["data"].update({subpath: props}) + result["data"].update({subpath: {} | props}) # avoid shared reference else: result.update(unpacked) From 85746e977a71825c67520166be929d38aa963088 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Tue, 27 Feb 2024 12:21:56 +0100 Subject: [PATCH 2/8] add test --- tests/func/plots/test_collect.py | 60 ++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/func/plots/test_collect.py diff --git a/tests/func/plots/test_collect.py b/tests/func/plots/test_collect.py new file mode 100644 index 0000000000..01dee68812 --- /dev/null +++ b/tests/func/plots/test_collect.py @@ -0,0 +1,60 @@ +import pytest + +import dpath +from dvc.repo.plots import Plots + + +def test_plots_definitions_works_with_nested_plots(tmp_dir, scm, dvc): + data_v1 = [ + {"x": 1, "y": 0.1}, + {"x": 2, "y": 0.2}, + {"x": 3, "y": 0.3}, + ] + data_subdir_v1 = [ + {"x": 1, "y": 0.2, "z": 0.1}, + {"x": 2, "y": 0.3, "z": 0.2}, + {"x": 3, "y": 0.4, "z": 0.3}, + ] + + plots_dir = tmp_dir / "plots" + subdir = plots_dir / "subdir" + + plots_dir.mkdir() + subdir.mkdir() + + (plots_dir / "data_v1.json").dump_json(data_v1) + (subdir / "data_subdir_v1.json").dump_json(data_subdir_v1) + + plots_config = [ + { + "plots/subdir/": { + "x": "z", + } + }, + { + "plots": { + "x": "x", + } + }, + ] + + from dvc.utils.serialize import modify_yaml + + with modify_yaml("dvc.yaml") as dvcfile_content: + dvcfile_content["plots"] = plots_config + + scm.add( + [ + "plots/data_v1.json", + "plots/subdir/data_subdir_v1.json", + "dvc.yaml", + ] + ) + scm.commit("add data sources") + + plots = next(Plots(dvc).collect()) + + assert dpath.get(plots, "workspace/definitions/data/dvc.yaml/data") == { + "plots/data_v1.json": {"x": "x"}, + "plots/subdir/data_subdir_v1.json": {"x": "z"}, + } From a932a0ddb7778ccd05f96843da7bef245429ee8e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:22:07 +0000 Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/func/plots/test_collect.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/func/plots/test_collect.py b/tests/func/plots/test_collect.py index 01dee68812..5273ff8432 100644 --- a/tests/func/plots/test_collect.py +++ b/tests/func/plots/test_collect.py @@ -1,6 +1,5 @@ -import pytest - import dpath + from dvc.repo.plots import Plots From 77a85d1821e36c8cf5ca1019e5cf7f7a42fce753 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Wed, 28 Feb 2024 10:00:17 +0100 Subject: [PATCH 4/8] add more extensive tests --- tests/func/plots/test_collect.py | 36 +++++++++++--------- tests/integration/plots/conftest.py | 48 +++++++++++++++++++++++++-- tests/integration/plots/test_plots.py | 18 ++++++++++ 3 files changed, 83 insertions(+), 19 deletions(-) diff --git a/tests/func/plots/test_collect.py b/tests/func/plots/test_collect.py index 5273ff8432..d6e751a0af 100644 --- a/tests/func/plots/test_collect.py +++ b/tests/func/plots/test_collect.py @@ -3,36 +3,36 @@ from dvc.repo.plots import Plots -def test_plots_definitions_works_with_nested_plots(tmp_dir, scm, dvc): - data_v1 = [ +def test_subdir_config_not_overwritten_by_parents(tmp_dir, scm, dvc): + plot_data = [ {"x": 1, "y": 0.1}, {"x": 2, "y": 0.2}, {"x": 3, "y": 0.3}, ] - data_subdir_v1 = [ + subdir_plot_data = [ {"x": 1, "y": 0.2, "z": 0.1}, {"x": 2, "y": 0.3, "z": 0.2}, {"x": 3, "y": 0.4, "z": 0.3}, ] - plots_dir = tmp_dir / "plots" - subdir = plots_dir / "subdir" + (tmp_dir / "plots").mkdir() + (tmp_dir / "plots" / "subdir").mkdir() - plots_dir.mkdir() - subdir.mkdir() - - (plots_dir / "data_v1.json").dump_json(data_v1) - (subdir / "data_subdir_v1.json").dump_json(data_subdir_v1) + (tmp_dir / "plots" / "plot.json").dump_json(plot_data) + (tmp_dir / "plots" / "subdir" / "plot.json").dump_json(subdir_plot_data) plots_config = [ { "plots/subdir/": { "x": "z", + "y": "x", } }, + {"plots": {"x": "x", "y": "y"}}, { - "plots": { - "x": "x", + "subdir axis defined by filename": { + "x": {"plots/subdir/plot.json": "x"}, + "y": {"plots/subdir/plot.json": "y"}, } }, ] @@ -44,8 +44,8 @@ def test_plots_definitions_works_with_nested_plots(tmp_dir, scm, dvc): scm.add( [ - "plots/data_v1.json", - "plots/subdir/data_subdir_v1.json", + "plots/plot.json", + "plots/subdir/plot.json", "dvc.yaml", ] ) @@ -54,6 +54,10 @@ def test_plots_definitions_works_with_nested_plots(tmp_dir, scm, dvc): plots = next(Plots(dvc).collect()) assert dpath.get(plots, "workspace/definitions/data/dvc.yaml/data") == { - "plots/data_v1.json": {"x": "x"}, - "plots/subdir/data_subdir_v1.json": {"x": "z"}, + "plots/plot.json": {"x": "x", "y": "y"}, + "plots/subdir/plot.json": {"x": "z", "y": "x"}, + "subdir axis defined by filename": { + "x": {"plots/subdir/plot.json": "x"}, + "y": {"plots/subdir/plot.json": "y"}, + }, } diff --git a/tests/integration/plots/conftest.py b/tests/integration/plots/conftest.py index fd5deb58b4..8199d80da9 100644 --- a/tests/integration/plots/conftest.py +++ b/tests/integration/plots/conftest.py @@ -86,15 +86,27 @@ def make(): @pytest.fixture def repo_with_config_plots(tmp_dir, scm, dvc, run_copy_metrics): def make(): + # test subdir functionality + linear_subdirA_v1 = [ + {"x": 1, "y": 0.2}, + {"x": 2, "y": 0.3}, + {"x": 3, "y": 0.4}, + ] + # test subdir default values for x = step + linear_subdirB_v1 = [ + {"step": 1, "y": 0.2}, + {"step": 2, "y": 0.3}, + {"step": 3, "y": 0.4}, + ] linear_train_v1 = [ {"x": 1, "y": 0.1}, {"x": 2, "y": 0.2}, {"x": 3, "y": 0.3}, ] linear_test_v1 = [ - {"x": 1, "y": 0.2}, - {"x": 2, "y": 0.3}, - {"x": 3, "y": 0.4}, + {"x": 1, "y": 0.3}, + {"x": 2, "y": 0.4}, + {"x": 3, "y": 0.5}, ] confusion_train_v1 = [ @@ -110,6 +122,10 @@ def make(): {"actual": 1, "predicted": 0}, ] + (tmp_dir / "subdirA").mkdir() + (tmp_dir / "subdirB").mkdir() + (tmp_dir / "subdirA" / "linear_subdir_src.json").dump_json(linear_subdirA_v1) + (tmp_dir / "subdirB" / "linear_subdir_src.json").dump_json(linear_subdirB_v1) (tmp_dir / "linear_train_src.json").dump_json(linear_train_v1) (tmp_dir / "linear_test_src.json").dump_json(linear_test_v1) (tmp_dir / "confusion_train_src.json").dump_json(confusion_train_v1) @@ -117,6 +133,8 @@ def make(): scm.add( [ + "subdirA/linear_subdir_src.json", + "subdirB/linear_subdir_src.json", "linear_train_src.json", "linear_test_src.json", "confusion_train_src.json", @@ -125,6 +143,20 @@ def make(): ) scm.commit("add data sources") + run_copy_metrics( + "subdirA/linear_subdir_src.json", + "subdirA/linear_subdir.json", + name="linear_subdir", + outs=["subdirA/linear_subdir.json"], + commit="linear_subdir", + ) + run_copy_metrics( + "subdirB/linear_subdir_src.json", + "subdirB/linear_subdir.json", + name="linear_subdir", + outs=["subdirB/linear_subdir.json"], + commit="linear_subdir", + ) run_copy_metrics( "linear_train_src.json", "linear_train.json", @@ -154,6 +186,14 @@ def make(): commit="confusion_test", ) plots_config = [ + { + "subdirA": { + "x": "x", + "y": "y", + "title": "subdir plots with x and y defined", + } + }, + {"subdirB": {"title": "subdir plots with default x and y"}}, { "linear_train_vs_test": { "x": "x", @@ -182,6 +222,8 @@ def make(): scm.commit("commit dvc files") yield { "data": { + "subdirA/linear_subdir.json": linear_subdirA_v1, + "subdirB/linear_subdir.json": linear_subdirB_v1, "linear_train.json": linear_train_v1, "linear_test.json": linear_test_v1, "confusion_train.json": confusion_train_v1, diff --git a/tests/integration/plots/test_plots.py b/tests/integration/plots/test_plots.py index e3d5f189d9..d53c5a467b 100644 --- a/tests/integration/plots/test_plots.py +++ b/tests/integration/plots/test_plots.py @@ -440,6 +440,8 @@ def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots): html_result = extract_vega_specs( html_path, [ + "subdirA/linear_subdir.json", + "subdirB/linear_subdir.json", "linear_train_vs_test", "confusion_train_vs_test", ], @@ -466,6 +468,22 @@ def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots): ] == ble ) + assert ( + html_result["subdirA/linear_subdir.json"]["layer"][1]["encoding"]["x"]["field"] + == "x" + ) + assert ( + html_result["subdirA/linear_subdir.json"]["layer"][1]["encoding"]["y"]["field"] + == "y" + ) + assert ( + html_result["subdirB/linear_subdir.json"]["layer"][1]["encoding"]["x"]["field"] + == "step" + ) + assert ( + html_result["subdirB/linear_subdir.json"]["layer"][1]["encoding"]["y"]["field"] + == "y" + ) @pytest.mark.vscode From b0c409e5b8f92981a74b544f115b72a693c05d73 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Wed, 28 Feb 2024 10:54:14 +0100 Subject: [PATCH 5/8] implement logic for closest parent detection --- dvc/repo/plots/__init__.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 9387853236..1a30190d8e 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -449,11 +449,10 @@ def _resolve_definitions( config_dir = fs.dirname(config_path) result: dict[str, dict] = {} - # sort by key length to process the most general paths first - # then override them with more specific ones. - for plot_id, plot_props in sorted( - definitions.items(), key=lambda item: len(item[0]) - ): + plot_ids_parents = [ + _normpath(fs.join(config_dir, plot_id)) for plot_id in definitions + ] + for plot_id, plot_props in definitions.items(): if plot_props is None: plot_props = {} if _id_is_path(plot_props): @@ -462,6 +461,12 @@ def _resolve_definitions( unpacked = unpack_if_dir( fs, data_path, props=plot_props | props, onerror=onerror ) + # use config for parent directory with most specific definition + unpacked["data"] = { + k: v + for k, v in unpacked["data"].items() + if _closest_parent(fs, k, plot_ids_parents) == data_path + } dpath.merge(result, unpacked) elif _matches(targets, config_path, plot_id): adjusted_props = _adjust_sources(fs, plot_props, config_dir) @@ -470,6 +475,15 @@ def _resolve_definitions( return result +def _closest_parent(fs, path, parents): + best_result = "" + for parent in parents: + common_path = fs.commonpath([path, parent]) + if len(common_path) > len(best_result): + best_result = common_path + return best_result + + def _collect_pipeline_files(repo, targets: list[str], props, onerror=None): result: dict[str, dict] = {} top_plots = repo.index._plots From cdfc948b5158a4ee0ec33c89580fbc2bf9a5f313 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Wed, 28 Feb 2024 10:56:21 +0100 Subject: [PATCH 6/8] fix test --- tests/integration/plots/conftest.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/integration/plots/conftest.py b/tests/integration/plots/conftest.py index 8199d80da9..5ff1689a6a 100644 --- a/tests/integration/plots/conftest.py +++ b/tests/integration/plots/conftest.py @@ -87,13 +87,13 @@ def make(): def repo_with_config_plots(tmp_dir, scm, dvc, run_copy_metrics): def make(): # test subdir functionality - linear_subdirA_v1 = [ + linear_subdir_a_v1 = [ {"x": 1, "y": 0.2}, {"x": 2, "y": 0.3}, {"x": 3, "y": 0.4}, ] # test subdir default values for x = step - linear_subdirB_v1 = [ + linear_subdir_b_v1 = [ {"step": 1, "y": 0.2}, {"step": 2, "y": 0.3}, {"step": 3, "y": 0.4}, @@ -124,8 +124,8 @@ def make(): (tmp_dir / "subdirA").mkdir() (tmp_dir / "subdirB").mkdir() - (tmp_dir / "subdirA" / "linear_subdir_src.json").dump_json(linear_subdirA_v1) - (tmp_dir / "subdirB" / "linear_subdir_src.json").dump_json(linear_subdirB_v1) + (tmp_dir / "subdirA" / "linear_subdir_src.json").dump_json(linear_subdir_a_v1) + (tmp_dir / "subdirB" / "linear_subdir_src.json").dump_json(linear_subdir_b_v1) (tmp_dir / "linear_train_src.json").dump_json(linear_train_v1) (tmp_dir / "linear_test_src.json").dump_json(linear_test_v1) (tmp_dir / "confusion_train_src.json").dump_json(confusion_train_v1) @@ -222,8 +222,8 @@ def make(): scm.commit("commit dvc files") yield { "data": { - "subdirA/linear_subdir.json": linear_subdirA_v1, - "subdirB/linear_subdir.json": linear_subdirB_v1, + "subdirA/linear_subdir.json": linear_subdir_a_v1, + "subdirB/linear_subdir.json": linear_subdir_b_v1, "linear_train.json": linear_train_v1, "linear_test.json": linear_test_v1, "confusion_train.json": confusion_train_v1, From ff5636952cbf938b4f8bc1ed2bac4258e2ceac12 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Wed, 28 Feb 2024 10:58:30 +0100 Subject: [PATCH 7/8] use explicit code to fix copy bug --- dvc/repo/plots/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index 1a30190d8e..fe526d2891 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -538,7 +538,7 @@ def unpack_if_dir(fs, path, props: dict[str, str], onerror: Optional[Callable] = if "data" in unpacked: for subpath in unpacked["data"]: - result["data"].update({subpath: {} | props}) # avoid shared reference + result["data"].update({subpath: props.copy()}) else: result.update(unpacked) From b8882321976df9cf0272c95c746e6ec3f87266f2 Mon Sep 17 00:00:00 2001 From: AlexandreKempf Date: Wed, 28 Feb 2024 15:28:50 +0100 Subject: [PATCH 8/8] add subdir test to integration tests --- tests/integration/plots/conftest.py | 55 +++++++++++++++++++---------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/tests/integration/plots/conftest.py b/tests/integration/plots/conftest.py index 5ff1689a6a..a85a700f41 100644 --- a/tests/integration/plots/conftest.py +++ b/tests/integration/plots/conftest.py @@ -122,10 +122,8 @@ def make(): {"actual": 1, "predicted": 0}, ] - (tmp_dir / "subdirA").mkdir() - (tmp_dir / "subdirB").mkdir() - (tmp_dir / "subdirA" / "linear_subdir_src.json").dump_json(linear_subdir_a_v1) - (tmp_dir / "subdirB" / "linear_subdir_src.json").dump_json(linear_subdir_b_v1) + (tmp_dir / "linear_subdir_a_src.json").dump_json(linear_subdir_a_v1) + (tmp_dir / "linear_subdir_b_src.json").dump_json(linear_subdir_b_v1) (tmp_dir / "linear_train_src.json").dump_json(linear_train_v1) (tmp_dir / "linear_test_src.json").dump_json(linear_test_v1) (tmp_dir / "confusion_train_src.json").dump_json(confusion_train_v1) @@ -133,8 +131,8 @@ def make(): scm.add( [ - "subdirA/linear_subdir_src.json", - "subdirB/linear_subdir_src.json", + "linear_subdir_a_src.json", + "linear_subdir_b_src.json", "linear_train_src.json", "linear_test_src.json", "confusion_train_src.json", @@ -143,15 +141,18 @@ def make(): ) scm.commit("add data sources") + (tmp_dir / "subdirA").mkdir() + (tmp_dir / "subdirB").mkdir() + run_copy_metrics( - "subdirA/linear_subdir_src.json", + "linear_subdir_a_src.json", "subdirA/linear_subdir.json", name="linear_subdir", outs=["subdirA/linear_subdir.json"], commit="linear_subdir", ) run_copy_metrics( - "subdirB/linear_subdir_src.json", + "linear_subdir_b_src.json", "subdirB/linear_subdir.json", name="linear_subdir", outs=["subdirB/linear_subdir.json"], @@ -185,15 +186,16 @@ def make(): outs=["confusion_test.json"], commit="confusion_test", ) - plots_config = [ - { - "subdirA": { - "x": "x", - "y": "y", - "title": "subdir plots with x and y defined", - } - }, - {"subdirB": {"title": "subdir plots with default x and y"}}, + + subdir_a_config = { + "x": "x", + "y": "y", + "title": "subdir plots with x and y defined", + } + + subdir_b_config = {"title": "subdir plots with default x and y"} + + other_plots_config = [ { "linear_train_vs_test": { "x": "x", @@ -216,10 +218,19 @@ def make(): from dvc.utils.serialize import modify_yaml with modify_yaml("dvc.yaml") as dvcfile_content: - dvcfile_content["plots"] = plots_config + dvcfile_content["plots"] = [ + {"subdirA": subdir_a_config}, + {"subdirB": subdir_b_config}, + *other_plots_config, + ] scm.add(["dvc.yaml", "dvc.lock"]) scm.commit("commit dvc files") + + # remove generate .gitignore that are considered as plot otherwise + (tmp_dir / "subdirA" / ".gitignore").unlink() + (tmp_dir / "subdirB" / ".gitignore").unlink() + yield { "data": { "subdirA/linear_subdir.json": linear_subdir_a_v1, @@ -229,7 +240,13 @@ def make(): "confusion_train.json": confusion_train_v1, "confusion_test.json": confusion_test_v1, }, - "configs": {"dvc.yaml": plots_config}, + "configs": { + "dvc.yaml": [ + {"subdirA/linear_subdir.json": subdir_a_config}, + {"subdirB/linear_subdir.json": subdir_b_config}, + *other_plots_config, + ] + }, } return make