Skip to content

Commit

Permalink
ENH: Allow absolute precision in assert_almost_equal (#13357)
Browse files Browse the repository at this point in the history
This commit adds a new keyword argument `check_low_values`, that will
allow the approximate comparison of numerics based on literal decimal
places. This is particularly useful when comparing low values:

    # This fails because it's doing (1 - .1 / .1001)
    assert_almost_equal(0.1, 0.1001, check_less_precise=True)

    # This will work as intuitively expected
    assert_almost_equal(
        0.1, 0.1001,
        check_less_precise=True,
        check_low_values=True
    )
  • Loading branch information
Joao Veiga committed Dec 30, 2019
1 parent db062da commit 0910110
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 9 deletions.
22 changes: 14 additions & 8 deletions pandas/_libs/testing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True):

cpdef assert_almost_equal(a, b,
check_less_precise=False,
check_low_values=False,
bint check_dtype=True,
obj=None, lobj=None, robj=None):
"""
Expand All @@ -77,18 +78,21 @@ cpdef assert_almost_equal(a, b,
Specify comparison precision.
5 digits (False) or 3 digits (True) after decimal points are
compared. If an integer, then this will be the number of decimal
points to compare
points to compare.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
check_dtype: bool, default True
check dtype if both a and b are np.ndarray
check dtype if both a and b are np.ndarray.
obj : str, default None
Specify object name being compared, internally used to show
appropriate assertion message
appropriate assertion message.
lobj : str, default None
Specify left object name being compared, internally used to show
appropriate assertion message
appropriate assertion message.
robj : str, default None
Specify right object name being compared, internally used to show
appropriate assertion message
appropriate assertion message.
"""
cdef:
int decimal
Expand All @@ -103,6 +107,7 @@ cpdef assert_almost_equal(a, b,
robj = b

assert isinstance(check_less_precise, (int, bool))
assert isinstance(check_low_values, bool)

if isinstance(a, dict) or isinstance(b, dict):
return assert_dict_equal(a, b)
Expand Down Expand Up @@ -162,7 +167,8 @@ cpdef assert_almost_equal(a, b,
for i in range(len(a)):
try:
assert_almost_equal(a[i], b[i],
check_less_precise=check_less_precise)
check_less_precise=check_less_precise,
check_low_values=check_low_values)
except AssertionError:
is_unequal = True
diff += 1
Expand Down Expand Up @@ -203,8 +209,8 @@ cpdef assert_almost_equal(a, b,

fa, fb = a, b

# case for zero
if abs(fa) < 1e-5:
# case for zero, or explicit low value comparison
if abs(fa) < 1e-5 or check_low_values:
if not decimal_almost_equal(fa, fb, decimal):
assert False, (f'(very low values) expected {fb:.5f} '
f'but got {fa:.5f}, with decimal {decimal}')
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/util/test_assert_almost_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,35 @@ def test_assert_almost_equal_numbers(a, b):
_assert_almost_equal_both(a, b)


@pytest.mark.parametrize(
"a,b",
[
(1.1, 1.1),
(1.1, 1.100001),
# check_less_precise=True allows 1.1 vs 1.1001
(1.1, 1.1001),
],
)
def test_assert_almost_equal_numbers_less_precise(a, b):
_assert_almost_equal_both(a, b, check_less_precise=True)


@pytest.mark.parametrize(
"a,b",
[
(1.1, 1.1),
(1.1, 1.100001),
(1.1, 1.1001),
# check_low_values allows for 0.1 vs 0.1001
(0.1, 0.1001),
# Testing this example, as per #13357
(0.000011, 0.000012),
],
)
def test_assert_almost_equal_numbers_low_values(a, b):
_assert_almost_equal_both(a, b, check_less_precise=True, check_low_values=True)


@pytest.mark.parametrize("a,b", [(1.1, 1), (1.1, True), (1, 2), (1.0001, np.int16(1))])
def test_assert_not_almost_equal_numbers(a, b):
_assert_not_almost_equal_both(a, b)
Expand Down
38 changes: 37 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def assert_almost_equal(
right,
check_dtype: Union[bool, str] = "equiv",
check_less_precise: Union[bool, int] = False,
check_low_values: bool = False,
**kwargs,
):
"""
Expand All @@ -305,6 +306,9 @@ def assert_almost_equal(
they are equivalent within the specified precision. Otherwise, we
compare the **ratio** of the second number to the first number and
check whether it is equivalent to 1 within the specified precision.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
"""
if isinstance(left, pd.Index):
assert_index_equal(
Expand All @@ -313,6 +317,7 @@ def assert_almost_equal(
check_exact=False,
exact=check_dtype,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
**kwargs,
)

Expand All @@ -323,6 +328,7 @@ def assert_almost_equal(
check_exact=False,
check_dtype=check_dtype,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
**kwargs,
)

Expand All @@ -333,6 +339,7 @@ def assert_almost_equal(
check_exact=False,
check_dtype=check_dtype,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
**kwargs,
)

Expand All @@ -356,6 +363,7 @@ def assert_almost_equal(
right,
check_dtype=check_dtype,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
**kwargs,
)

Expand Down Expand Up @@ -570,6 +578,7 @@ def assert_index_equal(
exact: Union[bool, str] = "equiv",
check_names: bool = True,
check_less_precise: Union[bool, int] = False,
check_low_values: bool = False,
check_exact: bool = True,
check_categorical: bool = True,
obj: str = "Index",
Expand All @@ -591,6 +600,9 @@ def assert_index_equal(
Specify comparison precision. Only used when check_exact is False.
5 digits (False) or 3 digits (True) after decimal points are compared.
If int, then specify the digits to compare.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
check_exact : bool, default True
Whether to compare number exactly.
check_categorical : bool, default True
Expand Down Expand Up @@ -660,6 +672,7 @@ def _get_ilevel_values(index, level):
exact=exact,
check_names=check_names,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_exact=check_exact,
obj=lobj,
)
Expand All @@ -677,6 +690,7 @@ def _get_ilevel_values(index, level):
left.values,
right.values,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_dtype=exact,
obj=obj,
lobj=left,
Expand Down Expand Up @@ -989,7 +1003,12 @@ def _raise(left, right, err_msg):


def assert_extension_array_equal(
left, right, check_dtype=True, check_less_precise=False, check_exact=False
left,
right,
check_dtype=True,
check_less_precise=False,
check_low_values=False,
check_exact=False,
):
"""Check that left and right ExtensionArrays are equal.
Expand All @@ -1003,6 +1022,9 @@ def assert_extension_array_equal(
Specify comparison precision. Only used when check_exact is False.
5 digits (False) or 3 digits (True) after decimal points are compared.
If int, then specify the digits to compare.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
check_exact : bool, default False
Whether to compare number exactly.
Expand Down Expand Up @@ -1036,6 +1058,7 @@ def assert_extension_array_equal(
right_valid,
check_dtype=check_dtype,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
obj="ExtensionArray",
)

Expand All @@ -1048,6 +1071,7 @@ def assert_series_equal(
check_index_type="equiv",
check_series_type=True,
check_less_precise=False,
check_low_values=False,
check_names=True,
check_exact=False,
check_datetimelike_compat=False,
Expand Down Expand Up @@ -1078,6 +1102,9 @@ def assert_series_equal(
they are equivalent within the specified precision. Otherwise, we
compare the **ratio** of the second number to the first number and
check whether it is equivalent to 1 within the specified precision.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
check_names : bool, default True
Whether to check the Series and Index names attribute.
check_exact : bool, default False
Expand Down Expand Up @@ -1114,6 +1141,7 @@ def assert_series_equal(
exact=check_index_type,
check_names=check_names,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_exact=check_exact,
check_categorical=check_categorical,
obj=f"{obj}.index",
Expand Down Expand Up @@ -1178,6 +1206,7 @@ def assert_series_equal(
left._internal_get_values(),
right._internal_get_values(),
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_dtype=check_dtype,
obj=str(obj),
)
Expand All @@ -1200,6 +1229,7 @@ def assert_frame_equal(
check_column_type="equiv",
check_frame_type=True,
check_less_precise=False,
check_low_values=False,
check_names=True,
by_blocks=False,
check_exact=False,
Expand Down Expand Up @@ -1243,6 +1273,9 @@ def assert_frame_equal(
they are equivalent within the specified precision. Otherwise, we
compare the **ratio** of the second number to the first number and
check whether it is equivalent to 1 within the specified precision.
check_low_values : bool, default False
Use absolute comparison precision, instead of relative. This is
particularly useful when comparing values between -1 and 1.
check_names : bool, default True
Whether to check that the `names` attribute for both the `index`
and `column` attributes of the DataFrame is identical.
Expand Down Expand Up @@ -1321,6 +1354,7 @@ def assert_frame_equal(
exact=check_index_type,
check_names=check_names,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_exact=check_exact,
check_categorical=check_categorical,
obj=f"{obj}.index",
Expand All @@ -1333,6 +1367,7 @@ def assert_frame_equal(
exact=check_column_type,
check_names=check_names,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_exact=check_exact,
check_categorical=check_categorical,
obj=f"{obj}.columns",
Expand Down Expand Up @@ -1361,6 +1396,7 @@ def assert_frame_equal(
check_dtype=check_dtype,
check_index_type=check_index_type,
check_less_precise=check_less_precise,
check_low_values=check_low_values,
check_exact=check_exact,
check_names=check_names,
check_datetimelike_compat=check_datetimelike_compat,
Expand Down

0 comments on commit 0910110

Please sign in to comment.