Skip to content

Commit

Permalink
ENH: Allow parameter min_periods in DataFrame.corrwith() (#58231)
Browse files Browse the repository at this point in the history
* Testing

* Testing

* enhance test case

* add test

* testing

* add

* add test

* enhance

* add

* add

* add

* add

* add

* add

* enhance

* enhance

* enhance

* Update doc/source/whatsnew/v3.0.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* test

* Update test_cov_corr.py

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
  • Loading branch information
Khor Chean Wei and mroeschke committed Apr 30, 2024
1 parent 66cfd80 commit 0f9adf8
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Other enhancements
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11132,6 +11132,7 @@ def corrwith(
drop: bool = False,
method: CorrelationMethod = "pearson",
numeric_only: bool = False,
min_periods: int | None = None,
) -> Series:
"""
Compute pairwise correlation.
Expand Down Expand Up @@ -11162,6 +11163,9 @@ def corrwith(
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.
min_periods : int, optional
Minimum number of observations needed to have a valid result.
.. versionadded:: 1.5.0
.. versionchanged:: 2.0.0
Expand Down Expand Up @@ -11205,7 +11209,10 @@ def corrwith(
this = self._get_numeric_data() if numeric_only else self

if isinstance(other, Series):
return this.apply(lambda x: other.corr(x, method=method), axis=axis)
return this.apply(
lambda x: other.corr(x, method=method, min_periods=min_periods),
axis=axis,
)

if numeric_only:
other = other._get_numeric_data()
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,28 @@ def test_corrwith_spearman_with_tied_data(self):
result = df_bool.corrwith(ser_bool)
expected = Series([0.57735, 0.57735], index=["A", "B"])
tm.assert_series_equal(result, expected)

def test_corrwith_min_periods_method(self):
# GH#9490
pytest.importorskip("scipy")
df1 = DataFrame(
{
"A": [1, np.nan, 7, 8],
"B": [False, True, True, False],
"C": [10, 4, 9, 3],
}
)
df2 = df1[["B", "C"]]
result = (df1 + 1).corrwith(df2.B, method="spearman", min_periods=2)
expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"])
tm.assert_series_equal(result, expected)

def test_corrwith_min_periods_boolean(self):
# GH#9490
df_bool = DataFrame(
{"A": [True, True, False, False], "B": [True, False, False, True]}
)
ser_bool = Series([True, True, False, True])
result = df_bool.corrwith(ser_bool, min_periods=3)
expected = Series([0.57735, 0.57735], index=["A", "B"])
tm.assert_series_equal(result, expected)
2 changes: 2 additions & 0 deletions pandas/tests/groupby/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def test_frame_consistency(groupby_func):
exclude_expected = {"numeric_only"}
elif groupby_func in ("quantile",):
exclude_expected = {"method", "axis"}
elif groupby_func in ["corrwith"]:
exclude_expected = {"min_periods"}
if groupby_func not in ["pct_change", "size"]:
exclude_expected |= {"axis"}

Expand Down

0 comments on commit 0f9adf8

Please sign in to comment.