From f848f10177538e2a7cb39a6be81e95b1930ce786 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Wed, 27 Mar 2024 17:48:16 +0000 Subject: [PATCH] Backport PR #57758: BUG: DataFrame Interchange Protocol errors on Boolean columns --- doc/source/whatsnew/v2.2.2.rst | 1 + pandas/core/interchange/utils.py | 3 +++ pandas/tests/interchange/test_impl.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index 54084abab7817..2a48403d9a318 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -22,6 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) diff --git a/pandas/core/interchange/utils.py b/pandas/core/interchange/utils.py index 2a19dd5046aa3..fd1c7c9639242 100644 --- a/pandas/core/interchange/utils.py +++ b/pandas/core/interchange/utils.py @@ -144,6 +144,9 @@ def dtype_to_arrow_c_fmt(dtype: DtypeObj) -> str: elif isinstance(dtype, DatetimeTZDtype): return ArrowCTypes.TIMESTAMP.format(resolution=dtype.unit[0], tz=dtype.tz) + elif isinstance(dtype, pd.BooleanDtype): + return ArrowCTypes.BOOL + raise NotImplementedError( f"Conversion of {dtype} to Arrow C format string is not implemented." ) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 1ccada9116d4c..25418b8bb2b37 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -470,6 +470,7 @@ def test_non_str_names_w_duplicates(): ), ([1.0, 2.25, None], "Float32", "float32"), ([1.0, 2.25, None], "Float32[pyarrow]", "float32"), + ([True, False, None], "boolean", "bool"), ([True, False, None], "boolean[pyarrow]", "bool"), (["much ado", "about", None], "string[pyarrow_numpy]", "large_string"), (["much ado", "about", None], "string[pyarrow]", "large_string"), @@ -532,6 +533,7 @@ def test_pandas_nullable_with_missing_values( ), ([1.0, 2.25, 5.0], "Float32", "float32"), ([1.0, 2.25, 5.0], "Float32[pyarrow]", "float32"), + ([True, False, False], "boolean", "bool"), ([True, False, False], "boolean[pyarrow]", "bool"), (["much ado", "about", "nothing"], "string[pyarrow_numpy]", "large_string"), (["much ado", "about", "nothing"], "string[pyarrow]", "large_string"),