Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dvc/command/experiments/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def show_experiments(
)

if kwargs.get("only_changed", False) or html:
td.drop_duplicates("cols")
td.drop_duplicates("cols", ignore_empty=False)

html_args = {}
if html:
Expand Down
6 changes: 4 additions & 2 deletions dvc/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def dropna(self, axis: str = "rows", how="any"):
else:
self.drop(*to_drop)

def drop_duplicates(self, axis: str = "rows"):
def drop_duplicates(self, axis: str = "rows", ignore_empty: bool = True):
if axis not in ["rows", "cols"]:
raise ValueError(
f"Invalid 'axis' value {axis}."
Expand All @@ -260,7 +260,9 @@ def drop_duplicates(self, axis: str = "rows"):
cols_to_drop: List[str] = []
for n_col, col in enumerate(self.columns):
# Cast to str because Text is not hashable error
unique_vals = {str(x) for x in col if x != self._fill_value}
unique_vals = {str(x) for x in col}
if ignore_empty and self._fill_value in unique_vals:
unique_vals -= {self._fill_value}
if len(unique_vals) == 1:
cols_to_drop.append(self.keys()[n_col])
self.drop(*cols_to_drop)
Expand Down
80 changes: 70 additions & 10 deletions tests/unit/test_tabular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,37 +221,97 @@ def test_dropna(axis, how, data, expected):


@pytest.mark.parametrize(
"axis,expected",
"axis,expected,ignore_empty",
[
(
"rows",
[
["foo", "", ""],
["foo", "foo", ""],
["foo", "-", "-"],
["foo", "foo", "-"],
["foo", "bar", "foobar"],
],
True,
),
("cols", [["-"], ["foo"], ["foo"], ["bar"]], True),
(
"cols",
[
["-", "-"],
["foo", "-"],
["foo", "-"],
["bar", "foobar"],
],
False,
),
("cols", [[""], ["foo"], ["foo"], ["bar"]]),
],
)
def test_drop_duplicates(axis, expected):
td = TabularData(["col-1", "col-2", "col-3"])
def test_drop_duplicates(axis, expected, ignore_empty):
td = TabularData(["col-1", "col-2", "col-3"], fill_value="-")
td.extend(
[["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]]
)

assert list(td) == [
["foo", "", ""],
["foo", "foo", ""],
["foo", "foo", ""],
["foo", "-", "-"],
["foo", "foo", "-"],
["foo", "foo", "-"],
["foo", "bar", "foobar"],
]

td.drop_duplicates(axis)
td.drop_duplicates(axis, ignore_empty=ignore_empty)

assert list(td) == expected


def test_drop_duplicates_ignore_empty():
td = TabularData(["col-1", "col-2", "col-3"], fill_value="-")
td.extend(
[["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]]
)

assert list(td) == [
["foo", "-", "-"],
["foo", "foo", "-"],
["foo", "foo", "-"],
["foo", "bar", "foobar"],
]

td.drop_duplicates("cols", ignore_empty=False)

assert list(td) == [
["-", "-"],
["foo", "-"],
["foo", "-"],
["bar", "foobar"],
]


def test_drop_duplicates_rich_text():
from dvc.ui import ui

td = TabularData(["col-1", "col-2", "col-3"], fill_value="-")

td.extend(
[
["foo", None, ui.rich_text("-")],
["foo", "foo"],
["foo", "foo"],
["foo", "bar", "foobar"],
]
)

assert list(td) == [
["foo", "-", ui.rich_text("-")],
["foo", "foo", "-"],
["foo", "foo", "-"],
["foo", "bar", "foobar"],
]

td.drop_duplicates("cols")

assert list(td) == [["-"], ["foo"], ["foo"], ["bar"]]


def test_dropna_invalid_axis():
td = TabularData(["col-1", "col-2", "col-3"])

Expand Down