diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 99415b6fc6ec8..5dbfa21715842 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -635,6 +635,18 @@ def is_string_dtype(arr_or_dtype) -> bool: >>> is_string_dtype(pd.Series([1, 2], dtype=object)) False """ + # Handle Categorical consistently whether passed as array or dtype + if hasattr(arr_or_dtype, "dtype") and isinstance( + _get_dtype(arr_or_dtype), CategoricalDtype + ): + return is_all_strings(arr_or_dtype) + elif isinstance(arr_or_dtype, CategoricalDtype): + # For CategoricalDtype, check if categories are strings + # Handle case where categories is None + if arr_or_dtype.categories is None: + return False + return arr_or_dtype.categories.inferred_type == "string" + if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O": return is_all_strings(arr_or_dtype) @@ -1900,6 +1912,9 @@ def is_all_strings(value: ArrayLike) -> bool: np.asarray(value), skipna=False ) elif isinstance(dtype, CategoricalDtype): + # Handle case where categories is None + if dtype.categories is None: + return False return dtype.categories.inferred_type == "string" return dtype == "string" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 62831f941b535..2af351b66cf5d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -180,7 +180,11 @@ def equals(self, other: Any) -> bool: should_try = other.inferred_type in inferable elif isinstance(other.dtype, CategoricalDtype): other = cast("CategoricalIndex", other) - should_try = other.categories.inferred_type in inferable + # Handle case where categories is None + if other.categories is not None: + should_try = other.categories.inferred_type in inferable + else: + should_try = False if should_try: try: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index cd5050cab8ad5..d69f810b92860 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -337,6 +337,23 @@ def test_is_string_dtype_nullable(nullable_string_dtype): assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype)) +def test_is_string_dtype_categorical(): + # GH#XXXXX - is_string_dtype should be consistent for Categorical series and dtype + cat_series = pd.Categorical(["A", "B", "C"]) + assert not com.is_string_dtype(cat_series) + assert not com.is_string_dtype(cat_series.dtype) + + # Test with string categories + cat_string_series = pd.Categorical(["A", "B", "C"], categories=["A", "B", "C"]) + assert com.is_string_dtype(cat_string_series) + assert com.is_string_dtype(cat_string_series.dtype) + + # Test with non-string categories + cat_int_series = pd.Categorical([1, 2, 3], categories=[1, 2, 3]) + assert not com.is_string_dtype(cat_int_series) + assert not com.is_string_dtype(cat_int_series.dtype) + + integer_dtypes: list = [] diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index f93105498ac79..b599be5d042fe 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser): } ).rename(columns={"B": "A"}) - res = df.query('C == 1', engine=engine, parser=parser) + res = df.query("C == 1", engine=engine, parser=parser) expect = DataFrame( [[1, 1, 1]], @@ -1411,7 +1411,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self): def test_expr_with_column_name_with_backtick(self): # GH 59285 df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)}) - result = df.query("`a``b` < 2") # noqa + result = df.query("`a``b` < 2") # Note: Formatting checks may wrongly consider the above ``inline code``. expected = df[df["a`b"] < 2] tm.assert_frame_equal(result, expected)