diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index eb136f09ce..c256b59adb 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -471,6 +471,9 @@ def show_experiments( } ) + if kwargs.get("only_changed", False): + td.drop_duplicates("cols") + td.render( pager=pager, borders=True, @@ -537,6 +540,7 @@ def run(self): pager=not self.args.no_pager, csv=self.args.csv, markdown=self.args.markdown, + only_changed=self.args.only_changed, ) return 0 @@ -1033,6 +1037,15 @@ def add_parser(subparsers, parent_parser): ), metavar="", ) + experiments_show_parser.add_argument( + "--only-changed", + action="store_true", + default=False, + help=( + "Only show metrics/params with values varying " + "across the selected experiments." + ), + ) experiments_show_parser.set_defaults(func=CmdExperimentsShow) EXPERIMENTS_APPLY_HELP = ( diff --git a/dvc/compare.py b/dvc/compare.py index 9384d5f1f2..e9d742d3bb 100644 --- a/dvc/compare.py +++ b/dvc/compare.py @@ -188,7 +188,6 @@ def dropna(self, axis: str = "rows"): f"Invalid 'axis' value {axis}." "Choose one of ['rows', 'cols']" ) - to_drop: Set = set() for n_row, row in enumerate(self): for n_col, col in enumerate(row): @@ -211,6 +210,41 @@ def dropna(self, axis: str = "rows"): else: self.drop(*to_drop) + def drop_duplicates(self, axis: str = "rows"): + if axis not in ["rows", "cols"]: + raise ValueError( + f"Invalid 'axis' value {axis}." + "Choose one of ['rows', 'cols']" + ) + + if axis == "cols": + cols_to_drop: List[str] = [] + for n_col, col in enumerate(self.columns): + # Cast to str because Text is not hashable error + unique_vals = {str(x) for x in col if x != self._fill_value} + if len(unique_vals) == 1: + cols_to_drop.append(self.keys()[n_col]) + self.drop(*cols_to_drop) + + elif axis == "rows": + unique_rows = [] + rows_to_drop: List[int] = [] + for n_row, row in enumerate(self): + tuple_row = tuple(row) + if tuple_row in unique_rows: + rows_to_drop.append(n_row) + else: + unique_rows.append(tuple_row) + + for name in self.keys(): + self._columns[name] = Column( + [ + x + for n, x in enumerate(self._columns[name]) + if n not in rows_to_drop + ] + ) + def _normalize_float(val: float, precision: int): return f"{val:.{precision}g}" diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py index 2c36eb864b..5a347f7184 100644 --- a/tests/func/experiments/test_show.py +++ b/tests/func/experiments/test_show.py @@ -538,3 +538,47 @@ def _get_rev_isotimestamp(rev): ) in cap.out ) + + +def test_show_only_changed(tmp_dir, dvc, scm, capsys): + tmp_dir.gen("copy.py", COPY_SCRIPT) + params_file = tmp_dir / "params.yaml" + params_data = { + "foo": 1, + "bar": 1, + } + (tmp_dir / params_file).dump(params_data) + + dvc.run( + cmd="python copy.py params.yaml metrics.yaml", + metrics_no_cache=["metrics.yaml"], + params=["foo", "bar"], + name="copy-file", + deps=["copy.py"], + ) + scm.add( + [ + "dvc.yaml", + "dvc.lock", + "copy.py", + "params.yaml", + "metrics.yaml", + ".gitignore", + ] + ) + scm.commit("init") + + dvc.experiments.run(params=["foo=2"]) + + capsys.readouterr() + assert main(["exp", "show"]) == 0 + cap = capsys.readouterr() + + print(cap) + assert "bar" in cap.out + + capsys.readouterr() + assert main(["exp", "show", "--only-changed"]) == 0 + cap = capsys.readouterr() + + assert "bar" not in cap.out diff --git a/tests/unit/test_tabular_data.py b/tests/unit/test_tabular_data.py index 9631c43621..43c9840b53 100644 --- a/tests/unit/test_tabular_data.py +++ b/tests/unit/test_tabular_data.py @@ -205,8 +205,47 @@ def test_dropna(axis, expected): assert list(td) == expected +@pytest.mark.parametrize( + "axis,expected", + [ + ( + "rows", + [ + ["foo", "", ""], + ["foo", "foo", ""], + ["foo", "bar", "foobar"], + ], + ), + ("cols", [[""], ["foo"], ["foo"], ["bar"]]), + ], +) +def test_drop_duplicates(axis, expected): + td = TabularData(["col-1", "col-2", "col-3"]) + td.extend( + [["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]] + ) + + assert list(td) == [ + ["foo", "", ""], + ["foo", "foo", ""], + ["foo", "foo", ""], + ["foo", "bar", "foobar"], + ] + + td.drop_duplicates(axis) + + assert list(td) == expected + + def test_dropna_invalid_axis(): td = TabularData(["col-1", "col-2", "col-3"]) with pytest.raises(ValueError, match="Invalid 'axis' value foo."): td.dropna("foo") + + +def test_drop_duplicates_invalid_axis(): + td = TabularData(["col-1", "col-2", "col-3"]) + + with pytest.raises(ValueError, match="Invalid 'axis' value foo."): + td.drop_duplicates("foo")