diff --git a/dvc/command/experiments/show.py b/dvc/command/experiments/show.py index 3ed78b5a4a..c189065499 100644 --- a/dvc/command/experiments/show.py +++ b/dvc/command/experiments/show.py @@ -473,7 +473,7 @@ def show_experiments( ) if kwargs.get("only_changed", False) or html: - td.drop_duplicates("cols") + td.drop_duplicates("cols", ignore_empty=False) html_args = {} if html: diff --git a/dvc/compare.py b/dvc/compare.py index 3fd782b223..59deb1ff7f 100644 --- a/dvc/compare.py +++ b/dvc/compare.py @@ -249,7 +249,7 @@ def dropna(self, axis: str = "rows", how="any"): else: self.drop(*to_drop) - def drop_duplicates(self, axis: str = "rows"): + def drop_duplicates(self, axis: str = "rows", ignore_empty: bool = True): if axis not in ["rows", "cols"]: raise ValueError( f"Invalid 'axis' value {axis}." @@ -260,7 +260,9 @@ def drop_duplicates(self, axis: str = "rows"): cols_to_drop: List[str] = [] for n_col, col in enumerate(self.columns): # Cast to str because Text is not hashable error - unique_vals = {str(x) for x in col if x != self._fill_value} + unique_vals = {str(x) for x in col} + if ignore_empty and self._fill_value in unique_vals: + unique_vals -= {self._fill_value} if len(unique_vals) == 1: cols_to_drop.append(self.keys()[n_col]) self.drop(*cols_to_drop) diff --git a/tests/unit/test_tabular_data.py b/tests/unit/test_tabular_data.py index 1963e1edb0..93984b9159 100644 --- a/tests/unit/test_tabular_data.py +++ b/tests/unit/test_tabular_data.py @@ -221,37 +221,97 @@ def test_dropna(axis, how, data, expected): @pytest.mark.parametrize( - "axis,expected", + "axis,expected,ignore_empty", [ ( "rows", [ - ["foo", "", ""], - ["foo", "foo", ""], + ["foo", "-", "-"], + ["foo", "foo", "-"], ["foo", "bar", "foobar"], ], + True, + ), + ("cols", [["-"], ["foo"], ["foo"], ["bar"]], True), + ( + "cols", + [ + ["-", "-"], + ["foo", "-"], + ["foo", "-"], + ["bar", "foobar"], + ], + False, ), - ("cols", [[""], ["foo"], ["foo"], ["bar"]]), ], ) -def test_drop_duplicates(axis, expected): - td = TabularData(["col-1", "col-2", "col-3"]) +def test_drop_duplicates(axis, expected, ignore_empty): + td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") td.extend( [["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]] ) assert list(td) == [ - ["foo", "", ""], - ["foo", "foo", ""], - ["foo", "foo", ""], + ["foo", "-", "-"], + ["foo", "foo", "-"], + ["foo", "foo", "-"], ["foo", "bar", "foobar"], ] - td.drop_duplicates(axis) + td.drop_duplicates(axis, ignore_empty=ignore_empty) assert list(td) == expected +def test_drop_duplicates_ignore_empty(): + td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") + td.extend( + [["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]] + ) + + assert list(td) == [ + ["foo", "-", "-"], + ["foo", "foo", "-"], + ["foo", "foo", "-"], + ["foo", "bar", "foobar"], + ] + + td.drop_duplicates("cols", ignore_empty=False) + + assert list(td) == [ + ["-", "-"], + ["foo", "-"], + ["foo", "-"], + ["bar", "foobar"], + ] + + +def test_drop_duplicates_rich_text(): + from dvc.ui import ui + + td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") + + td.extend( + [ + ["foo", None, ui.rich_text("-")], + ["foo", "foo"], + ["foo", "foo"], + ["foo", "bar", "foobar"], + ] + ) + + assert list(td) == [ + ["foo", "-", ui.rich_text("-")], + ["foo", "foo", "-"], + ["foo", "foo", "-"], + ["foo", "bar", "foobar"], + ] + + td.drop_duplicates("cols") + + assert list(td) == [["-"], ["foo"], ["foo"], ["bar"]] + + def test_dropna_invalid_axis(): td = TabularData(["col-1", "col-2", "col-3"])