From d1c54010f7558252199f3f9590986e2aed1dc44f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 30 Aug 2022 13:18:07 +0100 Subject: [PATCH] Backport PR #48246: REGR: iloc not possible for sparse DataFrame --- doc/source/whatsnew/v1.4.4.rst | 2 +- pandas/core/internals/managers.py | 17 +++++++++++++++-- pandas/tests/indexing/test_loc.py | 8 ++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index e03e6cd41ebd3..fc7a7c915bde5 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -32,7 +32,7 @@ Fixed regressions - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) - Fixed regression in :meth:`DataFrame.eval` creating a copy when updating inplace (:issue:`47449`) -- +- Fixed regression where getting a row using :meth:`DataFrame.iloc` with :class:`SparseDtype` would raise (:issue:`46406`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3084bcea49f05..9f4c799941afd 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1085,11 +1085,20 @@ def fast_xs(self, loc: int) -> SingleBlockManager: dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) n = len(self) - if isinstance(dtype, ExtensionDtype): + + # GH#46406 + immutable_ea = isinstance(dtype, SparseDtype) + + if isinstance(dtype, ExtensionDtype) and not immutable_ea: cls = dtype.construct_array_type() result = cls._empty((n,), dtype=dtype) else: - result = np.empty(n, dtype=dtype) + # error: Argument "dtype" to "empty" has incompatible type + # "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected + # "None" + result = np.empty( + n, dtype=object if immutable_ea else dtype # type: ignore[arg-type] + ) result = ensure_wrapped_if_datetimelike(result) for blk in self.blocks: @@ -1098,6 +1107,10 @@ def fast_xs(self, loc: int) -> SingleBlockManager: for i, rl in enumerate(blk.mgr_locs): result[rl] = blk.iget((i, loc)) + if immutable_ea: + dtype = cast(ExtensionDtype, dtype) + result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + block = new_block(result, placement=slice(0, len(result)), ndim=1) return SingleBlockManager(block, self.axes[0]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cf7db65015fa7..4e5571c7087e7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1341,6 +1341,14 @@ def test_loc_getitem_sparse_series(self): expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("indexer", ["loc", "iloc"]) + def test_getitem_single_row_sparse_df(self, indexer): + # GH#46406 + df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float)) + result = getattr(df, indexer)[0] + expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"])