pandas-dev · pandeconscious · Oct 23, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -210,6 +210,7 @@ Other enhancements
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
+- :meth:`Series.corr`, :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith` with ``method="kendall"`` and ``method="spearman"`` now work with ordered categorical data types (:issue:`60306`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
 - :meth:`Series.rank` and :meth:`DataFrame.rank` with numpy-nullable dtypes preserve ``NA`` values and return ``UInt64`` dtype where appropriate instead of casting ``NA`` to ``NaN`` with ``float64`` dtype (:issue:`62043`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -115,6 +115,7 @@
 from pandas.core.dtypes.dtypes import (
     ArrowDtype,
     BaseMaskedDtype,
+    CategoricalDtype,
     ExtensionDtype,
 )
 from pandas.core.dtypes.generic import (
@@ -11680,6 +11681,10 @@ def corr(
         data = self._get_numeric_data() if numeric_only else self
         cols = data.columns
         idx = cols.copy()
+
+        if method in ("spearman", "kendall"):
+            data = data._transform_ord_cat_cols_to_coded_cols()
+
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
 
         if method == "pearson":
@@ -11969,6 +11974,8 @@ def corrwith(
             correl = num / dom
 
         elif method in ["kendall", "spearman"] or callable(method):
+            left = left._transform_ord_cat_cols_to_coded_cols()
+            right = right._transform_ord_cat_cols_to_coded_cols()
 
             def c(x):
                 return nanops.nancorr(x[0], x[1], method=method)
@@ -12000,6 +12007,39 @@ def c(x):
 
         return correl
 
+    def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame:
+        """
+        any ordered categorical columns are transformed to the respective
+        categorical codes while other columns remain untouched
+        """
+        categ = self.select_dtypes("category")
+        if len(categ.columns) == 0:
+            return self
+
+        data = self.copy(deep=False)
+        cols_convert = categ.loc[:, categ.agg(lambda x: x.cat.ordered)].columns.unique()
+        single_cols = [col for col in cols_convert if isinstance(data[col], Series)]
+        duplicated_cols = [
+            col for col in cols_convert if isinstance(data[col], DataFrame)
+        ]
+
+        if not single_cols and not duplicated_cols:
+            return self
+
+        if single_cols:
+            data[single_cols] = data[single_cols].apply(
+                lambda x: x.cat.codes.replace(-1, np.nan)
+            )
+
+        if duplicated_cols:
+            data[duplicated_cols] = data[duplicated_cols].apply(
+                lambda x: x.cat.codes.replace(-1, np.nan)
+                if isinstance(x.dtype, CategoricalDtype) and bool(x.dtype.ordered)
+                else x
+            )
+
+        return data
+
     # ----------------------------------------------------------------------
     # ndarray-like stats methods
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2685,6 +2685,12 @@ def corr(
         if len(this) == 0:
             return np.nan
 
+        if method in ("spearman", "kendall"):
+            if this.dtype == "category" and this.cat.ordered:
+                this = this.cat.codes.replace(-1, np.nan)
+            if other.dtype == "category" and other.cat.ordered:
+                other = other.cat.codes.replace(-1, np.nan)
+
         this_values = this.to_numpy(dtype=float, na_value=np.nan, copy=False)
         other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False)
 

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -1,3 +1,5 @@
+from itertools import combinations
+
 import numpy as np
 import pytest
 
@@ -252,6 +254,46 @@ def test_corr_numeric_only(self, meth, numeric_only):
             with pytest.raises(ValueError, match="could not convert string to float"):
                 df.corr(meth, numeric_only=numeric_only)
 
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+    ):
+        df = DataFrame(
+            {
+                "ord_cat": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", "vh"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "ord_cat_none": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", None],
+                        categories=["low", "m", "h"],
+                        ordered=True,
+                    )
+                ),
+                "ord_int": Series([0, 1, 2, 3]),
+                "ord_float": Series([2.0, 3.0, 4.5, 6.5]),
+                "ord_float_nan": Series([2.0, 3.0, 4.5, np.nan]),
+                "ord_cat_shuff": Series(
+                    pd.Categorical(
+                        ["m", "h", "vh", "low"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "ord_int_shuff": Series([2, 3, 0, 1]),
+            }
+        )
+        corr_calc = df.corr(method=method)
+        for col1, col2 in combinations(df.columns, r=2):
+            corr_expected = df[col1].corr(df[col2], method=method)
+            tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
+
 
 class TestDataFrameCorrWith:
     @pytest.mark.parametrize(
@@ -493,3 +535,50 @@ def test_cov_with_missing_values(self):
         result2 = df.dropna().cov()
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+    ):
+        pytest.importorskip("scipy")
+        df1 = DataFrame(
+            {
+                "a": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", "vh"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "b": Series(
+                    pd.Categorical(
+                        ["low", "m", "h", None],
+                        categories=["low", "m", "h"],
+                        ordered=True,
+                    )
+                ),
+                "c": Series([0, 1, 2, 3]),
+                "d": Series([2.0, 3.0, 4.5, 6.5]),
+            }
+        )
+
+        df2 = DataFrame(
+            {
+                "a": Series([2.0, 3.0, 4.5, np.nan]),
+                "b": Series(
+                    pd.Categorical(
+                        ["m", "h", "vh", "low"],
+                        categories=["low", "m", "h", "vh"],
+                        ordered=True,
+                    )
+                ),
+                "c": Series([2, 3, 0, 1]),
+                "d": Series([2.0, 3.0, 4.5, 6.5]),
+            }
+        )
+
+        corr_calc = df1.corrwith(df2, method=method)
+        for col in df1.columns:
+            corr_expected = df1[col].corr(df2[col], method=method)
+            tm.assert_almost_equal(corr_calc.get(col), corr_expected)
diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py
@@ -184,3 +184,77 @@ def test_corr_callable_method(self, datetime_series):
         df = pd.DataFrame([s1, s2])
         expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
         tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+    ):
+        stats = pytest.importorskip("scipy.stats")
+        method_scipy_func = {"kendall": stats.kendalltau, "spearman": stats.spearmanr}
+        ser_ord_cat = Series(
+            pd.Categorical(
+                ["low", "med", "high", "very_high"],
+                categories=["low", "med", "high", "very_high"],
+                ordered=True,
+            )
+        )
+        ser_ord_cat_codes = ser_ord_cat.cat.codes.replace(-1, np.nan)
+        ser_ord_int = Series([0, 1, 2, 3])
+        ser_ord_float = Series([2.0, 3.0, 4.5, 6.5])
+
+        corr_calc = ser_ord_cat.corr(ser_ord_int, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_codes, ser_ord_int, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        corr_calc = ser_ord_cat.corr(ser_ord_float, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_codes, ser_ord_float, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        corr_calc = ser_ord_cat.corr(ser_ord_cat, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_codes, ser_ord_cat_codes, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        ser_ord_cat_shuff = Series(
+            pd.Categorical(
+                ["high", "low", "very_high", "med"],
+                categories=["low", "med", "high", "very_high"],
+                ordered=True,
+            )
+        )
+        ser_ord_cat_shuff_codes = ser_ord_cat_shuff.cat.codes.replace(-1, np.nan)
+
+        corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_shuff_codes, ser_ord_cat_codes, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat_shuff, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_shuff_codes, ser_ord_cat_shuff_codes, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)
+
+        ser_ord_cat_with_nan = Series(
+            pd.Categorical(
+                ["h", "low", "vh", None, "m"],
+                categories=["low", "m", "h", "vh"],
+                ordered=True,
+            )
+        )
+        ser_ord_cat_shuff_with_nan_codes = ser_ord_cat_with_nan.cat.codes.replace(
+            -1, np.nan
+        )
+        ser_ord_int = Series([2, 0, 1, 3, None])
+        corr_calc = ser_ord_cat_with_nan.corr(ser_ord_int, method=method)
+        corr_expected = method_scipy_func[method](
+            ser_ord_cat_shuff_with_nan_codes, ser_ord_int, nan_policy="omit"
+        )[0]
+        tm.assert_almost_equal(corr_calc, corr_expected)