Skip to content

Commit

Permalink
fix(python): don't allow duplicate columns in read_csv arg (#5908)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 26, 2022
1 parent 3dccb4a commit 0826920
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 3 deletions.
4 changes: 2 additions & 2 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,17 @@ def handle_projection_columns(
columns = None
elif not is_str_sequence(columns):
raise ValueError(
"columns arg should contain a list of all integers or all strings"
"'columns' arg should contain a list of all integers or all strings"
" values."
)
if columns and len(set(columns)) != len(columns):
raise ValueError(
f"'columns' arg should only have unique values. Got '{columns}'."
)
if projection and len(set(projection)) != len(projection):
raise ValueError(
f"'columns' arg should only have unique values. Got '{projection}'."
)
return projection, columns # type: ignore[return-value]


Expand Down
10 changes: 10 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,13 @@ def test_epoch_time_type() -> None:
pl.ComputeError, match="Cannot compute timestamp of a series with dtype 'Time'"
):
pl.Series([time(0, 0, 1)]).dt.epoch("s")


def test_duplicate_columns_arg_csv() -> None:
f = io.BytesIO()
pl.DataFrame({"x": [1, 2, 3], "y": ["a", "b", "c"]}).write_csv(f)
f.seek(0)
with pytest.raises(
ValueError, match=r"'columns' arg should only have unique values"
):
pl.read_csv(f, columns=["x", "x", "y"])

0 comments on commit 0826920

Please sign in to comment.