Skip to content

Commit

Permalink
Backport PR #52470 on branch 2.0.x (BUG: describe not returning Arrow…
Browse files Browse the repository at this point in the history
…Dtype) (#52495)

Backport PR #52470: BUG: describe not returning ArrowDtype
  • Loading branch information
mroeschke committed Apr 6, 2023
1 parent 31d1de3 commit 00a825f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.other:
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/methods/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
is_timedelta64_dtype,
)

import pandas as pd
from pandas.core.arrays.arrow.dtype import ArrowDtype
from pandas.core.arrays.floating import Float64Dtype
from pandas.core.reshape.concat import concat

from pandas.io.formats.format import format_percentiles
Expand Down Expand Up @@ -230,7 +231,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
# GH#48340 - always return float on non-complex numeric data
dtype: DtypeObj | None
if is_extension_array_dtype(series):
dtype = pd.Float64Dtype()
if isinstance(series.dtype, ArrowDtype):
import pyarrow as pa

dtype = ArrowDtype(pa.float64())
else:
dtype = Float64Dtype()
elif is_numeric_dtype(series) and not is_complex_dtype(series):
dtype = np.dtype("float")
else:
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,3 +2343,16 @@ def test_setitem_boolean_replace_with_mask_segfault():
expected = arr.copy()
arr[np.zeros((N,), dtype=np.bool_)] = False
assert arr._data == expected._data


@pytest.mark.parametrize("pa_type", tm.ALL_INT_PYARROW_DTYPES + tm.FLOAT_PYARROW_DTYPES)
def test_describe_numeric_data(pa_type):
# GH 52470
data = pd.Series([1, 2, 3], dtype=ArrowDtype(pa_type))
result = data.describe()
expected = pd.Series(
[3, 2, 1, 1, 1.5, 2.0, 2.5, 3],
dtype=ArrowDtype(pa.float64()),
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)

0 comments on commit 00a825f

Please sign in to comment.