diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f7039021ff276..5fef55347fda8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -556,7 +556,9 @@ I/O - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) +- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) +- Period ^^^^^^ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index a326925545045..74c0cd7719c13 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -329,15 +329,21 @@ def _from_sequence_of_strings( copy: bool = False, true_values: list[str] | None = None, false_values: list[str] | None = None, + none_values: list[str] | None = None, ) -> BooleanArray: true_values_union = cls._TRUE_VALUES.union(true_values or []) false_values_union = cls._FALSE_VALUES.union(false_values or []) - def map_string(s) -> bool: + if none_values is None: + none_values = [] + + def map_string(s) -> bool | None: if s in true_values_union: return True elif s in false_values_union: return False + elif s in none_values: + return None else: raise ValueError(f"{s} cannot be cast to bool") diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 7e91d9e262748..e7473aabdff87 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -745,11 +745,13 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi if isinstance(cast_type, BooleanDtype): # error: Unexpected keyword argument "true_values" for # "_from_sequence_of_strings" of "ExtensionArray" + values_str = [str(val) for val in values] return array_type._from_sequence_of_strings( # type: ignore[call-arg] - values, + values_str, dtype=cast_type, true_values=self.true_values, false_values=self.false_values, + none_values=self.na_values, ) else: return array_type._from_sequence_of_strings(values, dtype=cast_type) diff --git a/pandas/tests/io/data/excel/test_boolean_types.xlsx b/pandas/tests/io/data/excel/test_boolean_types.xlsx new file mode 100644 index 0000000000000..234703c32f0ab Binary files /dev/null and b/pandas/tests/io/data/excel/test_boolean_types.xlsx differ diff --git a/pandas/tests/io/data/excel/test_none_type.xlsx b/pandas/tests/io/data/excel/test_none_type.xlsx new file mode 100644 index 0000000000000..38aaf72ddfc8f Binary files /dev/null and b/pandas/tests/io/data/excel/test_none_type.xlsx differ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 6d6c3ad6b77a7..5ce78b1c90e76 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -164,6 +164,36 @@ def xfail_datetimes_with_pyxlsb(engine, request): class TestReaders: + @pytest.mark.parametrize("col", [[True, None, False], [True], [True, False]]) + def test_read_excel_type_check(self, col, datapath): + # GH 58159 + df = DataFrame({"bool_column": col}, dtype="boolean") + f_path = datapath("io", "data", "excel", "test_boolean_types.xlsx") + + df.to_excel(f_path, index=False) + df2 = pd.read_excel(f_path, dtype={"bool_column": "boolean"}, engine="openpyxl") + tm.assert_frame_equal(df, df2) + + def test_pass_none_type(self, datapath): + # GH 58159 + f_path = datapath("io", "data", "excel", "test_none_type.xlsx") + + with pd.ExcelFile(f_path) as excel: + parsed = pd.read_excel( + excel, + sheet_name="Sheet1", + keep_default_na=True, + na_values=["nan", "None", "abcd"], + dtype="boolean", + engine="openpyxl", + ) + expected = DataFrame( + {"Test": [True, None, False, None, False, None, True]}, + dtype="boolean", + ) + + tm.assert_frame_equal(parsed, expected) + @pytest.fixture(autouse=True) def cd_and_set_engine(self, engine, datapath, monkeypatch): """