Skip to content

Commit

Permalink
Backport PR #54881 on branch 2.1.x (REGR: read_csv raising when dtype…
Browse files Browse the repository at this point in the history
…s is specified with usecols) (#54926)
  • Loading branch information
phofl committed Sep 1, 2023
1 parent 9c8800d commit cf32a23
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
- Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`)

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1176,17 +1176,17 @@ def _set_no_thousand_columns(self) -> set[int]:
)
if self.columns and self.dtype:
assert self._col_indices is not None
for i in self._col_indices:
for i, col in zip(self._col_indices, self.columns):
if not isinstance(self.dtype, dict) and not is_numeric_dtype(
self.dtype
):
no_thousands_columns.add(i)
if (
isinstance(self.dtype, dict)
and self.columns[i] in self.dtype
and col in self.dtype
and (
not is_numeric_dtype(self.dtype[self.columns[i]])
or is_bool_dtype(self.dtype[self.columns[i]])
not is_numeric_dtype(self.dtype[col])
or is_bool_dtype(self.dtype[col])
)
):
no_thousands_columns.add(i)
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,20 @@ def test_string_inference(all_parsers):
columns=pd.Index(["a", "b"], dtype=dtype),
)
tm.assert_frame_equal(result, expected)


def test_dtypes_with_usecols(all_parsers):
# GH#54868

parser = all_parsers
data = """a,b,c
1,2,3
4,5,6"""

result = parser.read_csv(StringIO(data), usecols=["a", "c"], dtype={"a": object})
if parser.engine == "pyarrow":
values = [1, 4]
else:
values = ["1", "4"]
expected = DataFrame({"a": pd.Series(values, dtype=object), "c": [3, 6]})
tm.assert_frame_equal(result, expected)

0 comments on commit cf32a23

Please sign in to comment.