diff --git a/dvc/command/experiments/show.py b/dvc/command/experiments/show.py index 4f27419f0f..03cc123610 100644 --- a/dvc/command/experiments/show.py +++ b/dvc/command/experiments/show.py @@ -3,10 +3,12 @@ from collections import Counter, OrderedDict, defaultdict from datetime import date, datetime from fnmatch import fnmatch +from pathlib import Path from typing import TYPE_CHECKING, Dict, Iterable, Optional from funcy import lmap +from dvc.command import completion from dvc.command.base import CmdBase, append_doc_link from dvc.command.metrics import DEFAULT_PRECISION from dvc.exceptions import DvcException, InvalidArgumentError @@ -382,6 +384,7 @@ def show_experiments( no_timestamp=False, csv=False, markdown=False, + html=False, **kwargs, ): from funcy.seqs import flatten as flatten_list @@ -429,7 +432,7 @@ def show_experiments( kwargs.get("iso"), ) - if no_timestamp: + if no_timestamp or html: td.drop("Created") for col in ("State", "Executor"): @@ -466,9 +469,21 @@ def show_experiments( } ) - if kwargs.get("only_changed", False): + if kwargs.get("only_changed", False) or html: td.drop_duplicates("cols") + html_args = {} + if html: + td.dropna("rows", how="all") + td.column("Experiment")[:] = [ + # remove tree characters + str(x).encode("ascii", "ignore").strip().decode() + for x in td.column("Experiment") + ] + out = kwargs.get("out") or "dvc_plots" + html_args["output_path"] = (Path.cwd() / out).resolve() + html_args["color_by"] = kwargs.get("sort_by") or "Experiment" + td.render( pager=pager, borders=True, @@ -477,8 +492,13 @@ def show_experiments( row_styles=row_styles, csv=csv, markdown=markdown, + html=html, + **html_args, ) + if html and kwargs.get("open"): + return ui.open_browser(Path(out) / "index.html") + def _normalize_headers(names, count): return [ @@ -544,6 +564,9 @@ def run(self): csv=self.args.csv, markdown=self.args.markdown, only_changed=self.args.only_changed, + html=self.args.html, + out=self.args.out, + open=self.args.open, ) return 0 @@ -693,4 +716,23 @@ def add_parser(experiments_subparsers, parent_parser): "across the selected experiments." ), ) + experiments_show_parser.add_argument( + "--html", + action="store_true", + default=False, + help="Generate a parallel coordinates plot from the tabulated output.", + ) + experiments_show_parser.add_argument( + "-o", + "--out", + default=None, + help="Destination folder to save the HTML to", + metavar="", + ).complete = completion.DIR + experiments_show_parser.add_argument( + "--open", + action="store_true", + default=False, + help="Open the HTML directly in the browser.", + ) experiments_show_parser.set_defaults(func=CmdExperimentsShow) diff --git a/dvc/compare.py b/dvc/compare.py index 0bf8c54da0..3fd782b223 100644 --- a/dvc/compare.py +++ b/dvc/compare.py @@ -153,6 +153,18 @@ def to_csv(self) -> str: writer.writerow(row) return buff.getvalue() + def to_parallel_coordinates(self, output_path, color_by): + from dvc.render.html import write + from dvc.render.plotly import ParallelCoordinatesRenderer + + index_path = write( + output_path, + renderers=[ + ParallelCoordinatesRenderer(self, color_by, self._fill_value) + ], + ) + return index_path.as_uri() + def add_column(self, name: str) -> None: self._columns[name] = Column([self._fill_value] * len(self)) self._keys.append(name) @@ -173,6 +185,14 @@ def render(self, **kwargs: Any): if kwargs.pop("csv", False): ui.write(self.to_csv(), end="") + + elif kwargs.pop("html", False): + ui.write( + self.to_parallel_coordinates( + kwargs["output_path"], kwargs.get("color_by") + ) + ) + else: ui.table(self, headers=self.keys(), **kwargs) diff --git a/dvc/render/plotly.py b/dvc/render/plotly.py new file mode 100644 index 0000000000..8092e18faf --- /dev/null +++ b/dvc/render/plotly.py @@ -0,0 +1,92 @@ +import json +from collections import defaultdict +from typing import TYPE_CHECKING, Any, Dict, Optional + +from dvc.render.base import Renderer + +if TYPE_CHECKING: + from dvc.compare import TabularData + + +class ParallelCoordinatesRenderer(Renderer): + TYPE = "plotly" + + DIV = """ +
+ +
+ """ + + SCRIPTS = """ + + """ + + # pylint: disable=W0231 + def __init__( + self, + tabular_data: "TabularData", + color_by: Optional[str] = None, + fill_value: str = "", + ): + self.tabular_data = tabular_data + self.color_by = color_by + self.filename = "experiments" + self.fill_value = fill_value + + def partial_html(self, **kwargs): + return self.as_json() + + def as_json(self, **kwargs) -> str: + tabular_dict = defaultdict(list) + for row in self.tabular_data.as_dict(): + for col_name, value in row.items(): + tabular_dict[col_name].append(str(value)) + + trace: Dict[str, Any] = {"type": "parcoords", "dimensions": []} + for label, values in tabular_dict.items(): + is_categorical = False + try: + float_values = [ + float(x) if x != self.fill_value else None for x in values + ] + except ValueError: + is_categorical = True + + if is_categorical: + non_missing = [x for x in values if x != self.fill_value] + unique_values = sorted(set(non_missing)) + unique_values.append(self.fill_value) + + dummy_values = [unique_values.index(x) for x in values] + + values = [ + x if x != self.fill_value else "Missing" for x in values + ] + trace["dimensions"].append( + { + "label": label, + "values": dummy_values, + "tickvals": dummy_values, + "ticktext": values, + } + ) + else: + trace["dimensions"].append( + {"label": label, "values": float_values} + ) + + if label == self.color_by: + trace["line"] = { + "color": dummy_values if is_categorical else float_values, + "showscale": True, + "colorbar": {"title": self.color_by}, + } + if is_categorical: + trace["line"]["colorbar"]["tickmode"] = "array" + trace["line"]["colorbar"]["tickvals"] = dummy_values + trace["line"]["colorbar"]["ticktext"] = values + + return json.dumps({"data": [trace], "layout": {}}) diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py index c5e314f624..6edf4334c0 100644 --- a/tests/func/experiments/test_show.py +++ b/tests/func/experiments/test_show.py @@ -597,3 +597,78 @@ def test_show_only_changed(tmp_dir, dvc, scm, capsys): cap = capsys.readouterr() assert "bar" not in cap.out + + +def test_show_parallel_coordinates(tmp_dir, dvc, scm, mocker): + from dvc.command.experiments import show + + webbroser_open = mocker.patch("webbrowser.open") + show_experiments = mocker.spy(show, "show_experiments") + + tmp_dir.gen("copy.py", COPY_SCRIPT) + params_file = tmp_dir / "params.yaml" + params_data = { + "foo": 1, + "bar": 1, + } + (tmp_dir / params_file).dump(params_data) + + dvc.run( + cmd="python copy.py params.yaml metrics.yaml", + metrics_no_cache=["metrics.yaml"], + params=["foo", "bar"], + name="copy-file", + deps=["copy.py"], + ) + scm.add( + [ + "dvc.yaml", + "dvc.lock", + "copy.py", + "params.yaml", + "metrics.yaml", + ".gitignore", + ] + ) + scm.commit("init") + + dvc.experiments.run(params=["foo=2"]) + + assert main(["exp", "show", "--html"]) == 0 + kwargs = show_experiments.call_args[1] + + html_text = (tmp_dir / "dvc_plots" / "index.html").read_text() + assert all(rev in html_text for rev in ["workspace", "master", "[exp-"]) + + assert ( + '{"label": "metrics.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text + ) + assert ( + '{"label": "params.yaml:foo", "values": [2.0, 1.0, 2.0]}' in html_text + ) + assert '"line": {"color": [2, 1, 0]' in html_text + assert '"label": "metrics.yaml:bar"' not in html_text + + assert ( + main(["exp", "show", "--html", "--sort-by", "metrics.yaml:foo"]) == 0 + ) + kwargs = show_experiments.call_args[1] + + html_text = (tmp_dir / "dvc_plots" / "index.html").read_text() + assert '"line": {"color": [2.0, 1.0, 2.0]' in html_text + + assert main(["exp", "show", "--html", "--out", "experiments"]) == 0 + kwargs = show_experiments.call_args[1] + + assert kwargs["out"] == "experiments" + assert (tmp_dir / "experiments" / "index.html").exists() + + assert main(["exp", "show", "--html", "--open"]) == 0 + + webbroser_open.assert_called() + + params_data = {"foo": 1, "bar": 1, "foobar": 2} + (tmp_dir / params_file).dump(params_data) + assert main(["exp", "show", "--html"]) == 0 + html_text = (tmp_dir / "dvc_plots" / "index.html").read_text() + assert '{"label": "foobar", "values": [2.0, null, null]}' in html_text diff --git a/tests/unit/command/test_experiments.py b/tests/unit/command/test_experiments.py index d4fd65a5dc..1257910343 100644 --- a/tests/unit/command/test_experiments.py +++ b/tests/unit/command/test_experiments.py @@ -646,6 +646,7 @@ def test_experiments_init_config(dvc, scm, mocker): assert isinstance(cmd, CmdExperimentsInit) assert cmd.run() == 0 + m.assert_called_once_with( ANY(Repo), name="train", @@ -758,3 +759,37 @@ def test_experiments_init_extra_args(extra_args, expected_kw, mocker): def test_experiments_init_type_invalid_choice(): with pytest.raises(DvcParserError): parse_args(["exp", "init", "--type=invalid", "cmd"]) + + +def test_show_experiments_html(tmp_dir, mocker): + all_experiments = { + "workspace": { + "baseline": { + "data": { + "timestamp": None, + "params": {"params.yaml": {"data": {"foo": 1}}}, + "queued": False, + "running": False, + "executor": None, + "metrics": { + "scores.json": {"data": {"bar": 0.9544670443829399}} + }, + } + } + }, + } + experiments_table = mocker.patch( + "dvc.command.experiments.show.experiments_table" + ) + td = experiments_table.return_value + + show_experiments(all_experiments, html=True) + + td.dropna.assert_called_with("rows", how="all") + + render_kwargs = td.render.call_args[1] + + for arg in ["html", "output_path", "color_by"]: + assert arg in render_kwargs + assert render_kwargs["output_path"] == tmp_dir / "dvc_plots" + assert render_kwargs["color_by"] == "Experiment" diff --git a/tests/unit/render/test_parallel_coordinates.py b/tests/unit/render/test_parallel_coordinates.py new file mode 100644 index 0000000000..8eb6a06cbc --- /dev/null +++ b/tests/unit/render/test_parallel_coordinates.py @@ -0,0 +1,156 @@ +import json + +from dvc.compare import TabularData +from dvc.render.html import write +from dvc.render.plotly import ParallelCoordinatesRenderer + +# pylint: disable=W1514 + + +def expected_format(result): + assert "data" in result + assert "layout" in result + assert isinstance(result["data"], list) + assert result["data"][0]["type"] == "parcoords" + assert isinstance(result["data"][0]["dimensions"], list) + return True + + +def test_scalar_columns(): + td = TabularData(["col-1", "col-2", "col-3"]) + td.extend([["0.1", "1", ""], ["2", "0.2", "0"]]) + renderer = ParallelCoordinatesRenderer(td) + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + + assert result["data"][0]["dimensions"][0] == { + "label": "col-1", + "values": [0.1, 2.0], + } + assert result["data"][0]["dimensions"][1] == { + "label": "col-2", + "values": [1.0, 0.2], + } + assert result["data"][0]["dimensions"][2] == { + "label": "col-3", + "values": [None, 0], + } + + +def test_categorical_columns(): + td = TabularData(["col-1", "col-2"]) + td.extend([["foo", ""], ["bar", "foobar"], ["foo", ""]]) + renderer = ParallelCoordinatesRenderer(td) + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + + assert result["data"][0]["dimensions"][0] == { + "label": "col-1", + "values": [1, 0, 1], + "tickvals": [1, 0, 1], + "ticktext": ["foo", "bar", "foo"], + } + assert result["data"][0]["dimensions"][1] == { + "label": "col-2", + "values": [1, 0, 1], + "tickvals": [1, 0, 1], + "ticktext": ["Missing", "foobar", "Missing"], + } + + +def test_mixed_columns(): + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "0.1"], ["bar", "2"]]) + renderer = ParallelCoordinatesRenderer(td) + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + + assert result["data"][0]["dimensions"][0] == { + "label": "categorical", + "values": [1, 0], + "tickvals": [1, 0], + "ticktext": ["foo", "bar"], + } + assert result["data"][0]["dimensions"][1] == { + "label": "scalar", + "values": [0.1, 2.0], + } + + +def test_color_by_scalar(): + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "0.1"], ["bar", "2"]]) + renderer = ParallelCoordinatesRenderer(td, color_by="scalar") + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + assert result["data"][0]["line"] == { + "color": [0.1, 2.0], + "showscale": True, + "colorbar": {"title": "scalar"}, + } + + +def test_color_by_categorical(): + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "0.1"], ["bar", "2"]]) + renderer = ParallelCoordinatesRenderer(td, color_by="categorical") + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + assert result["data"][0]["line"] == { + "color": [1, 0], + "showscale": True, + "colorbar": { + "title": "categorical", + "tickmode": "array", + "tickvals": [1, 0], + "ticktext": ["foo", "bar"], + }, + } + + +def test_write_parallel_coordinates(tmp_dir): + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "0.1"], ["bar", "2"]]) + + renderer = ParallelCoordinatesRenderer(td) + html_path = write(tmp_dir, renderers=[renderer]) + + html_text = html_path.read_text() + + assert ParallelCoordinatesRenderer.SCRIPTS in html_text + + div = ParallelCoordinatesRenderer.DIV.format( + id="plot_experiments", partial=renderer.as_json() + ) + assert div in html_text + + +def test_fill_value(): + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "-"], ["-", "2"]]) + renderer = ParallelCoordinatesRenderer(td, fill_value="-") + + result = json.loads(renderer.as_json()) + + assert expected_format(result) + + assert result["data"][0]["dimensions"][0] == { + "label": "categorical", + "values": [0, 1], + "tickvals": [0, 1], + "ticktext": ["foo", "Missing"], + } + assert result["data"][0]["dimensions"][1] == { + "label": "scalar", + "values": [None, 2.0], + } diff --git a/tests/unit/test_tabular_data.py b/tests/unit/test_tabular_data.py index a8439d2815..1963e1edb0 100644 --- a/tests/unit/test_tabular_data.py +++ b/tests/unit/test_tabular_data.py @@ -264,3 +264,21 @@ def test_drop_duplicates_invalid_axis(): with pytest.raises(ValueError, match="Invalid 'axis' value foo."): td.drop_duplicates("foo") + + +def test_to_parallel_coordinates(tmp_dir, mocker): + (tmp_dir / "foo").mkdir() + td = TabularData(["categorical", "scalar"]) + td.extend([["foo", "0.1"], ["bar", "2"]]) + + write = mocker.patch("dvc.render.html.write") + renderer_class = mocker.patch( + "dvc.render.plotly.ParallelCoordinatesRenderer" + ) + renderer = renderer_class.return_value + + td.render(html=True, output_path="foo") + + renderer_class.assert_called_with(td, None, td._fill_value) + + write.assert_called_with("foo", renderers=[renderer])