From 6725e37684a24afeaea2757fb3512c58d37cef86 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Tue, 6 Feb 2024 10:38:56 -0700 Subject: [PATCH] DOC: fix PR02 errors in docstring for pandas.core.groupby.DataFrameGroupBy.corrwith (#57243) * DOC: fix PR02 errors in docstring for pandas.core.groupby.DataFrameGroupBy.corrwith * updated code example for groupby(...).corrwith --- ci/code_checks.sh | 1 - pandas/core/frame.py | 8 +++-- pandas/core/groupby/generic.py | 55 +++++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f8f63de2c3cda..0645918354c29 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -151,7 +151,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.groupby.DataFrameGroupBy.hist\ pandas.core.groupby.DataFrameGroupBy.plot\ - pandas.core.groupby.DataFrameGroupBy.corrwith\ pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 614e8ff0232f5..239000bf50e01 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11060,8 +11060,10 @@ def corrwith( -------- >>> index = ["a", "b", "c", "d", "e"] >>> columns = ["one", "two", "three", "four"] - >>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), index=index, columns=columns) - >>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), index=index[:4], columns=columns) + >>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), + ... index=index, columns=columns) + >>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), + ... index=index[:4], columns=columns) >>> df1.corrwith(df2) one 1.0 two 1.0 @@ -11076,7 +11078,7 @@ def corrwith( d 1.0 e NaN dtype: float64 - """ # noqa: E501 + """ axis = self._get_axis_number(axis) this = self._get_numeric_data() if numeric_only else self diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 32810d57a436e..db626752e9eff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2491,7 +2491,6 @@ def dtypes(self) -> Series: lambda df: df.dtypes, self._selected_obj ) - @doc(DataFrame.corrwith.__doc__) def corrwith( self, other: DataFrame | Series, @@ -2499,6 +2498,60 @@ def corrwith( method: CorrelationMethod = "pearson", numeric_only: bool = False, ) -> DataFrame: + """ + Compute pairwise correlation. + + Pairwise correlation is computed between rows or columns of + DataFrame with rows or columns of Series or DataFrame. DataFrames + are first aligned along both axes before computing the + correlations. + + Parameters + ---------- + other : DataFrame, Series + Object with which to compute correlations. + drop : bool, default False + Drop missing indices from result. + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. + + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionadded:: 1.5.0 + + .. versionchanged:: 2.0.0 + The default value of ``numeric_only`` is now ``False``. + + Returns + ------- + Series + Pairwise correlations. + + See Also + -------- + DataFrame.corr : Compute pairwise correlation of columns. + + Examples + -------- + >>> df1 = pd.DataFrame({"Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "Data": [6, 6, 8, 5, 4, 2, 7, 3, 9]}) + >>> df2 = pd.DataFrame({"Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "Data": [5, 3, 8, 3, 1, 1, 2, 3, 6]}) + + >>> df1.groupby("Day").corrwith(df2) + Data Day + Day + 1 0.917663 NaN + 2 0.755929 NaN + 3 0.576557 NaN + """ result = self._op_via_apply( "corrwith", other=other,