Skip to content

Commit

Permalink
ENH: Raise ParserError instead of IndexError when specifying an incor…
Browse files Browse the repository at this point in the history
…rect number of columns with index_col for the read_csv C parser. (pandas-dev#48774)

* Raise ParserError instead of IndexError when specifying an incorrect number of columns with index_col for the read_csv C parser.

* Move whatsnew entry

* Cleanup after moving whatsnew

* Cleanup after moving whatsnew (pt. 2)
  • Loading branch information
DriesSchaumont committed Sep 27, 2022
1 parent dd846e9 commit 0e93caf
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.6.0.rst
Expand Up @@ -115,7 +115,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor

Other API changes
^^^^^^^^^^^^^^^^^
-
- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
-

.. ---------------------------------------------------------------------------
Expand Down
8 changes: 8 additions & 0 deletions pandas/io/parsers/c_parser_wrapper.py
Expand Up @@ -33,6 +33,7 @@

from pandas.io.parsers.base_parser import (
ParserBase,
ParserError,
is_index_col,
)

Expand Down Expand Up @@ -270,6 +271,13 @@ def read(
# implicit index, no index names
arrays = []

if self.index_col and self._reader.leading_cols != len(self.index_col):
raise ParserError(
"Could not construct index. Requested to use "
f"{len(self.index_col)} number of columns, but "
f"{self._reader.leading_cols} left to parse."
)

for i in range(self._reader.leading_cols):
if self.index_col is None:
values = data.pop(i)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/parser/common/test_read_errors.py
Expand Up @@ -292,6 +292,18 @@ def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines):
parser.read_csv(StringIO(data), on_bad_lines="error", **kwds)


def test_bad_header_uniform_error(all_parsers):
parser = all_parsers
data = "+++123456789...\ncol1,col2,col3,col4\n1,2,3,4\n"
msg = "Expected 2 fields in line 2, saw 4"
if parser.engine == "c":
msg = "Could not construct index. Requested to use 1 "
"number of columns, but 3 left to parse."

with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), index_col=0, on_bad_lines="error")


def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys):
# see gh-15925
parser = all_parsers
Expand Down

0 comments on commit 0e93caf

Please sign in to comment.