ENH: Allow relative and/or absolute precision in assert_almost_equal

This commit makes `assert_almost_equal` accept both relative and absolute precision when comparing numbers, through two new keyword arguments: `rtol`, and `atol`, respectively. Under the hood, `_libs.testing.assert_almost_equal` is now calling `math.isclose`, instead of an adaptaion of [numpy.testing.assert_almost_equal](https://docs.scipy.org/doc/numpy-1.17.0/reference/generated/numpy.testing.assert_almost_equal.html).
pandas-dev · Jan 2, 2020 · 0f8ccd2 · 0f8ccd2
1 parent 8806ed7
commit 0f8ccd2
Show file tree

Hide file tree

Showing 7 changed files with 257 additions and 75 deletions.
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -226,8 +226,9 @@ Other enhancements
 - Added new writer for exporting Stata dta files in version 118, ``StataWriter118``.  This format supports exporting strings containing Unicode characters (:issue:`23573`)
 - :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
 - The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
-
-
+- :meth:`util.testing.assert_almost_equal` now accepts both relative and absolute
+  precision through the ``rtol``, and ``atol`` parameters, thus deprecating the
+  ``check_less_precision`` parameter. (:issue:`13357`).
 
 Build Changes
 ^^^^^^^^^^^^^

diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
@@ -1,3 +1,5 @@
+import math
+
 import numpy as np
 
 from pandas.core.dtypes.missing import isna, array_equivalent
@@ -38,13 +40,6 @@ cdef bint is_dictlike(obj):
     return hasattr(obj, 'keys') and hasattr(obj, '__getitem__')
 
 
-cdef bint decimal_almost_equal(double desired, double actual, int decimal):
-    # Code from
-    # http://docs.scipy.org/doc/numpy/reference/generated
-    # /numpy.testing.assert_almost_equal.html
-    return abs(desired - actual) < (0.5 * 10.0 ** -decimal)
-
-
 cpdef assert_dict_equal(a, b, bint compare_keys=True):
     assert is_dictlike(a) and is_dictlike(b), (
         "Cannot compare dict objects, one or both is not dict-like"
@@ -63,7 +58,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True):
 
 
 cpdef assert_almost_equal(a, b,
-                          check_less_precise=False,
+                          rtol=0.5e-5, atol=0.5e-5,
                           bint check_dtype=True,
                           obj=None, lobj=None, robj=None):
     """
@@ -73,25 +68,23 @@ cpdef assert_almost_equal(a, b,
     ----------
     a : object
     b : object
-    check_less_precise : bool or int, default False
-        Specify comparison precision.
-        5 digits (False) or 3 digits (True) after decimal points are
-        compared. If an integer, then this will be the number of decimal
-        points to compare
+    rtol : float, default 0.5e-5
+        Relative tolerance.
+    atol : float, default 0.5e-5
+        Absolute tolerance.
     check_dtype: bool, default True
-        check dtype if both a and b are np.ndarray
+        check dtype if both a and b are np.ndarray.
     obj : str, default None
         Specify object name being compared, internally used to show
-        appropriate assertion message
+        appropriate assertion message.
     lobj : str, default None
         Specify left object name being compared, internally used to show
-        appropriate assertion message
+        appropriate assertion message.
     robj : str, default None
         Specify right object name being compared, internally used to show
-        appropriate assertion message
+        appropriate assertion message.
     """
     cdef:
-        int decimal
         double diff = 0.0
         Py_ssize_t i, na, nb
         double fa, fb
@@ -102,8 +95,6 @@ cpdef assert_almost_equal(a, b,
     if robj is None:
         robj = b
 
-    assert isinstance(check_less_precise, (int, bool))
-
     if isinstance(a, dict) or isinstance(b, dict):
         return assert_dict_equal(a, b)
 
@@ -161,8 +152,7 @@ cpdef assert_almost_equal(a, b,
 
         for i in range(len(a)):
             try:
-                assert_almost_equal(a[i], b[i],
-                                    check_less_precise=check_less_precise)
+                assert_almost_equal(a[i], b[i], rtol=rtol, atol=atol)
             except AssertionError:
                 is_unequal = True
                 diff += 1
@@ -194,24 +184,11 @@ cpdef assert_almost_equal(a, b,
             # inf comparison
             return True
 
-        if check_less_precise is True:
-            decimal = 3
-        elif check_less_precise is False:
-            decimal = 5
-        else:
-            decimal = check_less_precise
-
         fa, fb = a, b
 
-        # case for zero
-        if abs(fa) < 1e-5:
-            if not decimal_almost_equal(fa, fb, decimal):
-                assert False, (f'(very low values) expected {fb:.5f} '
-                               f'but got {fa:.5f}, with decimal {decimal}')
-        else:
-            if not decimal_almost_equal(1, fb / fa, decimal):
-                assert False, (f'expected {fb:.5f} but got {fa:.5f}, '
-                               f'with decimal {decimal}')
+        if not math.isclose(fa, fb, rel_tol=rtol, abs_tol=atol):
+            assert False, (f"expected {fb:.5f} but got {fa:.5f}, "
+                           f"with rtol={rtol}, atol={atol}")
         return True
 
     raise AssertionError(f"{a} != {b}")
diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
@@ -211,15 +211,15 @@ def test_conversion_float(self):
 
         rs = self.dtc.convert(Timestamp("2012-1-1 01:02:03", tz="UTC"), None, None)
         xp = converter.dates.date2num(Timestamp("2012-1-1 01:02:03", tz="UTC"))
-        tm.assert_almost_equal(rs, xp, decimals)
+        tm.assert_almost_equal(rs, xp, check_less_precise=decimals)
 
         rs = self.dtc.convert(
             Timestamp("2012-1-1 09:02:03", tz="Asia/Hong_Kong"), None, None
         )
-        tm.assert_almost_equal(rs, xp, decimals)
+        tm.assert_almost_equal(rs, xp, check_less_precise=decimals)
 
         rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None)
-        tm.assert_almost_equal(rs, xp, decimals)
+        tm.assert_almost_equal(rs, xp, check_less_precise=decimals)
 
     def test_conversion_outofbounds_datetime(self):
         # 2579

diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py
@@ -80,11 +80,62 @@ def test_assert_almost_equal_numbers(a, b):
     _assert_almost_equal_both(a, b)
 
 
-@pytest.mark.parametrize("a,b", [(1.1, 1), (1.1, True), (1, 2), (1.0001, np.int16(1))])
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        (1.1, 1),
+        (1.1, True),
+        (1, 2),
+        (1.0001, np.int16(1)),
+        # The following two examples are not "almost equal" due to tol.
+        (0.1, 0.1001),
+        (0.0011, 0.0012),
+    ],
+)
 def test_assert_not_almost_equal_numbers(a, b):
     _assert_not_almost_equal_both(a, b)
 
 
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        (1.1, 1.1),
+        (1.1, 1.100001),
+        (1.1, 1.1001),
+        (0.000001, 0.000005),
+        (1000.0, 1000.0005),
+        # Testing this example, as per #13357
+        (0.000011, 0.000012),
+    ],
+)
+def test_assert_almost_equal_numbers_atol(a, b):
+    # Equivalent to the deprecated check_less_precise=True
+    _assert_almost_equal_both(a, b, atol=1e-3)
+
+
+@pytest.mark.parametrize("a,b", [(1.1, 1.11), (0.1, 0.101), (0.000011, 0.001012)])
+def test_assert_not_almost_equal_numbers_atol(a, b):
+    _assert_not_almost_equal_both(a, b, atol=1e-3)
+
+
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        (1.1, 1.1),
+        (1.1, 1.100001),
+        (1.1, 1.1001),
+        (0.000001, 0.000005),
+        (1000.0, 1000.0005),
+        (0.000011, 0.000012),
+        # These 2 examples pass because we're using relative tolerance
+        (1.1, 1.11),
+        (0.1, 0.101),
+    ],
+)
+def test_assert_almost_equal_numbers_rtol(a, b):
+    _assert_almost_equal_both(a, b, rtol=0.05)
+
+
 @pytest.mark.parametrize("a,b", [(0, 0), (0, 0.0), (0, np.float64(0)), (0.000001, 0)])
 def test_assert_almost_equal_numbers_with_zeros(a, b):
     _assert_almost_equal_both(a, b)
@@ -237,7 +288,7 @@ def test_assert_almost_equal_object():
 
 
 def test_assert_almost_equal_value_mismatch():
-    msg = "expected 2\\.00000 but got 1\\.00000, with decimal 5"
+    msg = "expected 2\\.00000 but got 1\\.00000, with rtol=5e-06, atol=5e-06"
 
     with pytest.raises(AssertionError, match=msg):
         tm.assert_almost_equal(1, 2)

diff --git a/pandas/tests/util/test_assert_extension_array_equal.py b/pandas/tests/util/test_assert_extension_array_equal.py
@@ -36,8 +36,8 @@ def test_assert_extension_array_equal_not_exact(kwargs):
     "check_less_precise", [True, False, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 )
 def test_assert_extension_array_equal_less_precise(check_less_precise):
-    arr1 = SparseArray([0.5, 0.123456])
-    arr2 = SparseArray([0.5, 0.123457])
+    arr1 = SparseArray([0.5, 0.12345])
+    arr2 = SparseArray([0.5, 0.12346])
 
     kwargs = dict(check_less_precise=check_less_precise)
 
@@ -46,8 +46,8 @@ def test_assert_extension_array_equal_less_precise(check_less_precise):
 ExtensionArray are different
 
 ExtensionArray values are different \\(50\\.0 %\\)
-\\[left\\]:  \\[0\\.5, 0\\.123456\\]
-\\[right\\]: \\[0\\.5, 0\\.123457\\]"""
+\\[left\\]:  \\[0\\.5, 0\\.12345\\]
+\\[right\\]: \\[0\\.5, 0\\.12346\\]"""
 
         with pytest.raises(AssertionError, match=msg):
             tm.assert_extension_array_equal(arr1, arr2, **kwargs)

diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py
@@ -110,7 +110,7 @@ def test_less_precise(data1, data2, dtype, check_less_precise):
 
     if (check_less_precise is False or check_less_precise == 10) or (
         (check_less_precise is True or check_less_precise >= 3)
-        and abs(data1 - data2) >= 0.0001
+        and abs(data1 - data2) >= 0.0005
     ):
         msg = "Series values are different"
         with pytest.raises(AssertionError, match=msg):