Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Change numeric_only default to True #46096

Merged
merged 23 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d98e262
BUG: Change numeric_only default to True
NumberPiOso Feb 21, 2022
0db8e52
Add future warning numeric_only in DataFrame.quantile
NumberPiOso Feb 23, 2022
c427795
Add deprecations.quantile_datetime_timedelta_colums in whatsnew
NumberPiOso Feb 23, 2022
fcf37a8
Add stacklevel to warning in DataFrame.quantile
NumberPiOso Feb 23, 2022
7e1d286
Modify test to expect warning in test_quantile.py
NumberPiOso Feb 23, 2022
a5e6def
Ignore FutureWarning at test_quantile.py
NumberPiOso Feb 23, 2022
108bb45
Correct documentation numeric_only inDataFrame.quantile
NumberPiOso Feb 24, 2022
93c5f65
DEPR: Specify nodefault for numeric_only default
NumberPiOso Mar 1, 2022
0f37c30
DEPR: Update whatsnew #7308
NumberPiOso Mar 2, 2022
a324dc9
DEPR: Correct frame.quantile tests to specify numeric_only
NumberPiOso Mar 2, 2022
82db984
DEPR: Correct message error frame.quantile
NumberPiOso Mar 2, 2022
9e9b7a9
DEPR: Remove warning filtering DataFrame.quantile
NumberPiOso Mar 3, 2022
67f2cfa
DEPR: Update whatsnew doc about numeric_only attribute
NumberPiOso Mar 3, 2022
42e7df4
DEPR: Update Examples in docs frame.quantile
NumberPiOso Mar 3, 2022
d0a1221
DEPR: Correct test finalize DataFrame.quantile
NumberPiOso Mar 3, 2022
377cc54
Revert "DEPR: Correct frame.quantile tests to specify numeric_only"
NumberPiOso Mar 8, 2022
b460aa2
DEPR: Update tests of quantile with non num cols"
NumberPiOso Mar 8, 2022
7194d13
DEPR: Raise warning frame.quantile with numeric_only
NumberPiOso Mar 8, 2022
f5f7a3e
DEPR: Update doctests quantile, numeric only
NumberPiOso Mar 8, 2022
16e5fc2
DEPR: Correct test_numeric_only_default_false_warning
NumberPiOso Mar 10, 2022
b88b196
DEPR: Add non numeric test to numeric_only warning
NumberPiOso Mar 10, 2022
db68bf2
DERP: correct_test_produces_warning in frame.quantile
NumberPiOso Mar 10, 2022
4438118
Merge remote-tracking branch 'upstream/main' into ench-quantile-numer…
NumberPiOso Mar 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ Other Deprecations
- Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`)
- Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`)
- Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`)
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
-

.. ---------------------------------------------------------------------------
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
is_integer_dtype,
is_iterator,
is_list_like,
is_numeric_dtype,
is_object_dtype,
is_scalar,
is_sequence,
Expand Down Expand Up @@ -10558,7 +10559,7 @@ def quantile(
self,
q=0.5,
axis: Axis = 0,
numeric_only: bool = True,
numeric_only: bool | lib.NoDefault = no_default,
interpolation: str = "linear",
):
"""
Expand Down Expand Up @@ -10628,6 +10629,17 @@ def quantile(
"""
validate_percentile(q)
axis = self._get_axis_number(axis)
any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes)
if numeric_only is no_default and any_not_numeric:
warnings.warn(
"In future versions of pandas, numeric_only will be set to "
"False by default, and the datetime/timedelta columns will "
"be considered in the results. To not consider these columns"
"specify numeric_only=True.",
FutureWarning,
stacklevel=find_stack_level(),
)
numeric_only = True

if not is_list_like(q):
# BlockManager.quantile expects listlike, so we wrap and unwrap here
Expand Down
48 changes: 35 additions & 13 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,28 @@


class TestDataFrameQuantile:
@pytest.mark.parametrize(
"non_num_col",
[
pd.date_range("2014-01-01", periods=3, freq="m"),
["a", "b", "c"],
[DataFrame, Series, Timestamp],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are you testing with this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual quantile method must ignore the non numeric columns and produce a warning. I am using three different non-numeric columns. A date column, a string column and a column of objects.

],
)
def test_numeric_only_default_false_warning(self, non_num_col):
# GH #7308
df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]})
NumberPiOso marked this conversation as resolved.
Show resolved Hide resolved
df["C"] = non_num_col

expected = Series(
[2.0, 3.0],
index=["A", "B"],
name=0.5,
)
with tm.assert_produces_warning(FutureWarning, match="numeric_only"):
result = df.quantile(0.5)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"df,expected",
[
Expand Down Expand Up @@ -43,21 +65,21 @@ def test_quantile(self, datetime_frame):
from numpy import percentile

df = datetime_frame
q = df.quantile(0.1, axis=0)
q = df.quantile(0.1, axis=0, numeric_only=True)
assert q["A"] == percentile(df["A"], 10)
tm.assert_index_equal(q.index, df.columns)

q = df.quantile(0.9, axis=1)
q = df.quantile(0.9, axis=1, numeric_only=True)
assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90)
tm.assert_index_equal(q.index, df.index)

# test degenerate case
q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0)
q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True)
assert np.isnan(q["x"]) and np.isnan(q["y"])

# non-numeric exclusion
df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
rs = df.quantile(0.5)
rs = df.quantile(0.5, numeric_only=True)
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
xp = df.median().rename(0.5)
tm.assert_series_equal(rs, xp)
Expand All @@ -78,7 +100,7 @@ def test_quantile(self, datetime_frame):
# so that we exclude non-numeric along the same axis
# See GH #7312
df = DataFrame([[1, 2, 3], ["a", "b", 4]])
result = df.quantile(0.5, axis=1)
result = df.quantile(0.5, axis=1, numeric_only=True)
expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -107,7 +129,7 @@ def test_quantile_axis_mixed(self):
"D": ["foo", "bar", "baz"],
}
)
result = df.quantile(0.5, axis=1)
result = df.quantile(0.5, axis=1, numeric_only=True)
expected = Series([1.5, 2.5, 3.5], name=0.5)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -206,7 +228,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame):

# interpolation = linear (default case)
df = datetime_frame
q = df.quantile(0.1, axis=0, interpolation="linear")
q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear")
assert q["A"] == np.percentile(df["A"], 10)

def test_quantile_interpolation_int(self, int_frame):
Expand Down Expand Up @@ -249,7 +271,7 @@ def test_quantile_datetime(self):
df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]})

# exclude datetime
result = df.quantile(0.5)
result = df.quantile(0.5, numeric_only=True)
expected = Series([2.5], index=["b"])

# datetime
Expand Down Expand Up @@ -285,11 +307,11 @@ def test_quantile_datetime(self):
tm.assert_frame_equal(result, expected)

# empty when numeric_only=True
result = df[["a", "c"]].quantile(0.5)
result = df[["a", "c"]].quantile(0.5, numeric_only=True)
expected = Series([], index=[], dtype=np.float64, name=0.5)
tm.assert_series_equal(result, expected)

result = df[["a", "c"]].quantile([0.5])
result = df[["a", "c"]].quantile([0.5], numeric_only=True)
expected = DataFrame(index=[0.5])
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -567,12 +589,12 @@ def test_quantile_empty_no_columns(self):
# GH#23925 _get_numeric_data may drop all columns
df = DataFrame(pd.date_range("1/1/18", periods=5))
df.columns.name = "captain tightpants"
result = df.quantile(0.5)
result = df.quantile(0.5, numeric_only=True)
expected = Series([], index=[], name=0.5, dtype=np.float64)
expected.index.name = "captain tightpants"
tm.assert_series_equal(result, expected)

result = df.quantile([0.5])
result = df.quantile([0.5], numeric_only=True)
expected = DataFrame([], index=[0.5], columns=[])
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -763,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
"c": pd.to_datetime(["2011", "2012"]),
}
)
result = df[["a", "c"]].quantile(0.5, axis=axis)
result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True)
expected = Series(
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
)
Expand Down
18 changes: 15 additions & 3 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,26 @@
marks=not_implemented_mark,
),
pytest.param(
(pd.DataFrame, frame_data, operator.methodcaller("quantile")),
(
pd.DataFrame,
frame_data,
operator.methodcaller("quantile", numeric_only=True),
),
marks=not_implemented_mark,
),
pytest.param(
(pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])),
(
pd.DataFrame,
frame_data,
operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
),
),
pytest.param(
(pd.DataFrame, frame_data, operator.methodcaller("quantile")),
(
pd.DataFrame,
frame_data,
operator.methodcaller("quantile", numeric_only=True),
),
marks=not_implemented_mark,
),
(
Expand Down