Skip to content

Commit

Permalink
REF: Use default_index or preserve original Index type for empty-li…
Browse files Browse the repository at this point in the history
…ke results (#59035)

* Use more default_index for empty cases

* fix tests

* Update number

* Address typing
  • Loading branch information
mroeschke committed Jun 18, 2024
1 parent f9f12de commit 849016c
Show file tree
Hide file tree
Showing 13 changed files with 35 additions and 23 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,8 @@ Timezones

Numeric
^^^^^^^
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
-

Conversion
^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -13078,7 +13078,7 @@ def quantile(

if len(data.columns) == 0:
# GH#23925 _get_numeric_data may have dropped all columns
cols = Index([], name=self.columns.name)
cols = self.columns[:0]

dtype = np.float64
if axis == 1:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
Index,
MultiIndex,
PeriodIndex,
RangeIndex,
default_index,
ensure_index,
)
Expand Down Expand Up @@ -1852,7 +1851,7 @@ def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
else:
# Drop the last level of Index by replacing with
# a RangeIndex
dropped.columns = RangeIndex(dropped.columns.size)
dropped.columns = default_index(dropped.columns.size)

# Handle dropping index labels
if labels_to_drop:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ class providing the base-class of operations.
from pandas.core.indexes.api import (
Index,
MultiIndex,
RangeIndex,
default_index,
)
from pandas.core.internals.blocks import ensure_block_shape
Expand Down Expand Up @@ -1264,7 +1263,7 @@ def _set_result_index_ordered(
if self._grouper.has_dropped_na:
# Add back in any missing rows due to dropna - index here is integral
# with values referring to the row of the input so can use RangeIndex
result = result.reindex(RangeIndex(len(index)), axis=0)
result = result.reindex(default_index(len(index)), axis=0)
result = result.set_axis(index, axis=0)

return result
Expand Down Expand Up @@ -1334,7 +1333,7 @@ def _wrap_aggregated_output(
# enforced in __init__
result = self._insert_inaxis_grouper(result, qs=qs)
result = result._consolidate()
result.index = RangeIndex(len(result))
result.index = default_index(len(result))

else:
index = self._grouper.result_index
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas.core.indexes.api import (
Index,
MultiIndex,
default_index,
)
from pandas.core.series import Series

Expand Down Expand Up @@ -901,7 +902,7 @@ def is_in_obj(gpr) -> bool:
if len(groupings) == 0 and len(obj):
raise ValueError("No group keys passed!")
if len(groupings) == 0:
groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))
groupings.append(Grouping(default_index(0), np.array([], dtype=np.intp)))

# create the internals grouper
grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, dropna=dropna)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _get_combined_index(
# TODO: handle index names!
indexes = _get_distinct_objs(indexes)
if len(indexes) == 0:
index = Index([])
index: Index = default_index(0)
elif len(indexes) == 1:
index = indexes[0]
elif intersect:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def blklocs(self) -> npt.NDArray[np.intp]:
def make_empty(self, axes=None) -> Self:
"""return an empty BlockManager with the items axis of len 0"""
if axes is None:
axes = [Index([])] + self.axes[1:]
axes = [default_index(0)] + self.axes[1:]

# preserve dtype if possible
if self.ndim == 1:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/methods/selectn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype

from pandas.core.indexes.api import default_index

if TYPE_CHECKING:
from pandas._typing import (
DtypeObj,
Expand All @@ -38,6 +40,7 @@

from pandas import (
DataFrame,
Index,
Series,
)
else:
Expand Down Expand Up @@ -199,8 +202,6 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No
self.columns = columns

def compute(self, method: str) -> DataFrame:
from pandas.core.api import Index

n = self.n
frame = self.obj
columns = self.columns
Expand All @@ -227,7 +228,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
original_index = frame.index
cur_frame = frame = frame.reset_index(drop=True)
cur_n = n
indexer = Index([], dtype=np.int64)
indexer: Index = default_index(0)

for i, column in enumerate(columns):
# For each column we apply method to cur_frame[column].
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from pandas.core.indexes.api import (
Index,
MultiIndex,
RangeIndex,
default_index,
)
from pandas.core.reshape.concat import concat
from pandas.core.series import Series
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def stack_reshape(
if data.ndim == 1:
data.name = 0
else:
data.columns = RangeIndex(len(data.columns))
data.columns = default_index(len(data.columns))
buf.append(data)

if len(buf) > 0 and not frame.empty:
Expand Down
13 changes: 11 additions & 2 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,14 +710,14 @@ def test_quantile_empty_no_columns(self, interp_method):
result = df.quantile(
0.5, numeric_only=True, interpolation=interpolation, method=method
)
expected = Series([], index=[], name=0.5, dtype=np.float64)
expected = Series([], name=0.5, dtype=np.float64)
expected.index.name = "captain tightpants"
tm.assert_series_equal(result, expected)

result = df.quantile(
[0.5], numeric_only=True, interpolation=interpolation, method=method
)
expected = DataFrame([], index=[0.5], columns=[])
expected = DataFrame([], index=[0.5])
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -926,3 +926,12 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
)
tm.assert_series_equal(result, expected)


def test_multi_quantile_numeric_only_retains_columns():
df = DataFrame(list("abc"))
result = df.quantile([0.5, 0.7], numeric_only=True)
expected = DataFrame(index=[0.5, 0.7])
tm.assert_frame_equal(
result, expected, check_index_type=True, check_column_type=True
)
8 changes: 6 additions & 2 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,19 @@ def test_get_numeric_data(self, frame_or_series):
if isinstance(o, DataFrame):
# preserve columns dtype
expected.columns = o.columns[:0]
# https://github.com/pandas-dev/pandas/issues/50862
tm.assert_equal(result.reset_index(drop=True), expected)
tm.assert_equal(result, expected)

# get the bool data
arr = np.array([True, True, False, True])
o = construct(frame_or_series, n, value=arr, **kwargs)
result = o._get_numeric_data()
tm.assert_equal(result, o)

def test_get_bool_data_empty_preserve_index(self):
expected = Series([], dtype="bool")
result = expected._get_bool_data()
tm.assert_series_equal(result, expected, check_index_type=True)

def test_nonzero(self, frame_or_series):
# GH 4633
# look at the boolean/nonzero behavior for objects
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1558,7 +1558,7 @@ def test_ensure_index_uint64(self):

def test_get_combined_index(self):
result = _get_combined_index([])
expected = Index([])
expected = RangeIndex(0)
tm.assert_index_equal(result, expected)


Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/series/methods/test_get_numeric_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from pandas import (
Index,
Series,
date_range,
)
Expand All @@ -19,7 +18,7 @@ def test_get_numeric_data_preserve_dtype(self):

obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
expected = Series([], dtype=object)
tm.assert_series_equal(result, expected)

obj = Series([True, False, True])
Expand All @@ -28,5 +27,5 @@ def test_get_numeric_data_preserve_dtype(self):

obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
expected = Series([], dtype="M8[ns]")
tm.assert_series_equal(result, expected)

0 comments on commit 849016c

Please sign in to comment.