diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 448ceffdaa1eb..024ce8dd7f96b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1187,6 +1187,7 @@ Other ^^^^^ - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`) +- Bug in :func:`assert_frame_equal` fails when comparing two DFs containing pd.NA that only differ in dtype and dtype is false (:issue:`61473`) - Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`) - Bug in :func:`eval` where method calls on binary operations like ``(x + y).dropna()`` would raise ``AttributeError: 'BinOp' object has no attribute 'value'`` (:issue:`61175`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index c8f3bb6bd77d2..644c217caf5dc 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -882,6 +882,9 @@ def assert_series_equal( Second Series to compare. check_dtype : bool, default True Whether to check the Series dtype is identical. + check_dtype : bool, if False + Whether to check the series is equal or not if dtype is + False. check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. @@ -1025,6 +1028,21 @@ def assert_series_equal( pass else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if not check_dtype and (left.dtype != right.dtype): + # checks only when check_dtype is False #GH #61473 + lv, rv = left._values, right._values + lv = np.asarray(lv, dtype=object) + rv = np.asarray(rv, dtype=object) + assert_numpy_array_equal( + lv, + rv, + check_dtype=check_dtype, + obj=str(obj), + index_values=left.index, + ) + check_exact = False + if check_exact: left_values = left._values right_values = right._values diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b599be5d042fe..b31e8529b238b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index 8711365a19214..3622ed0c7f4aa 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -413,3 +413,31 @@ def test_datetimelike_compat_deprecated(): tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=True) with tm.assert_produces_warning(Pandas4Warning, match=msg): tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=False) + + +def test_assert_frame_equal_na_object_vs_int32_check_dtype_false(): + # GH# 61473 + df1 = DataFrame({"a": pd.Series([pd.NA], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA], dtype="object")}) + tm.assert_frame_equal(df1, df2, check_dtype=False) + + +def test_assert_frame_equal_object_vs_int32_check_dtype_false(): + # GH# 61473 + df1 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="object")}) + tm.assert_frame_equal(df1, df2, check_dtype=False) + + +def test_assert_frame_not_equal_object_vs_int32_check_dtype_false(): + # GH# 61473 + df1 = DataFrame({"a": pd.Series([pd.NA, 0], dtype="Int32")}) + df2 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="object")}) + msg = r"""DataFrame\.iloc\[:, 0\] \(column name="a"\) are different + +DataFrame\.iloc\[:, 0\] \(column name="a"\) values are different \(50\.0 %\) +\[index\]: \[0, 1\] +\[left\]: \[, 0\] +\[right\]: \[, 1\]""" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_dtype=False)