Skip to content

Commit

Permalink
Merge pull request #3485 from pycaret/check_duplicate_cols
Browse files Browse the repository at this point in the history
Check duplicate cols
  • Loading branch information
Yard1 committed Apr 25, 2023
2 parents 87f398b + d8c4027 commit 4610ee8
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
4 changes: 4 additions & 0 deletions pycaret/internal/preprocess/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ def _prepare_dataset(self, X, y=None):
# Make copy to not overwrite mutable arguments
X = to_df(deepcopy(X))

# No duplicate column names are allowed
if len(set(X.columns)) != len(X.columns):
raise ValueError("Duplicate column names found in X.")

# Prepare target column
if isinstance(y, (list, tuple, np.ndarray, pd.Series)):
if not isinstance(y, pd.Series):
Expand Down
8 changes: 8 additions & 0 deletions tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ def test_assign_index(index):
assert pc.dataset.index[0] != 0


def test_duplicate_columns():
"""Assert that an error is raised when there are duplicate columns."""
data = pycaret.datasets.get_data("juice")
data = data.rename(columns={"Purchase": "Id"}) # Make another column named Id
with pytest.raises(ValueError, match=".*Duplicate column names found in X.*"):
pycaret.classification.setup(data)


def test_duplicate_indices():
"""Assert that an error is raised when there are duplicate indices."""
data = pycaret.datasets.get_data("juice")
Expand Down

0 comments on commit 4610ee8

Please sign in to comment.