From 4f5ddac990e8562de775c5345554585863826fe1 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 5 Apr 2019 12:51:16 -0700 Subject: [PATCH 01/12] =?UTF-8?q?Bump=20version:=204.0.4=20=E2=86=92=204.0?= =?UTF-8?q?.5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- deepdiff/__init__.py | 2 +- docs/conf.py | 4 ++-- docs/index.rst | 2 +- setup.cfg | 2 +- setup.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 506dd426..75e18ac1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 4.0.4 +# DeepDiff v 4.0.5 ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 34979357..99a72d72 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep and DeepHash classes.""" # flake8: noqa -__version__ = '4.0.4' +__version__ = '4.0.5' import logging if __name__ == '__main__': diff --git a/docs/conf.py b/docs/conf.py index fc99e489..89db289e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '4.0.4' +version = '4.0.5' # The full version, including alpha/beta/rc tags. -release = '4.0.4' +release = '4.0.5' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index fd5d7721..02a7c92d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 4.0.4 documentation! +DeepDiff 4.0.5 documentation! ============================= **DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes.** diff --git a/setup.cfg b/setup.cfg index 5847ffa4..1b39b971 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 4.0.4 +current_version = 4.0.5 commit = True tag = True tag_name = {new_version} diff --git a/setup.py b/setup.py index bc9ef194..ecabde8d 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '4.0.4' +version = '4.0.5' def get_reqs(filename): From b8f70767199227e0d26dfc92b868cf232392b1a5 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 5 Apr 2019 16:37:40 -0700 Subject: [PATCH 02/12] significant digits! --- deepdiff/deephash.py | 25 +++++++++++-------------- deepdiff/diff.py | 29 ++++++++++++++--------------- deepdiff/helper.py | 14 ++++++++++++++ tests/test_diff_text.py | 8 ++++++++ 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 8a7d50c7..df1e8ba7 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -1,8 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import logging -from collections import Iterable -from collections import MutableMapping +from collections.abc import Iterable, MutableMapping from collections import defaultdict from decimal import Decimal from hashlib import sha1, sha256 @@ -10,7 +9,8 @@ from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, get_doc, convert_item_or_items_into_compiled_regexes_else_none, - get_id, type_is_subclass_of_type_group, type_in_type_group) + get_id, type_is_subclass_of_type_group, type_in_type_group, + number_to_string) from deepdiff.base import Base logger = logging.getLogger(__name__) @@ -29,8 +29,6 @@ KEY_TO_VAL_STR = "{}:{}" -ZERO_DECIMAL_CHARACTERS = set("-0.") - def prepare_string_for_hashing(obj, ignore_string_type_changes=False, ignore_string_case=False): """ @@ -68,14 +66,16 @@ def __init__(self, ignore_numeric_type_changes=False, ignore_type_subclasses=False, ignore_string_case=False, + number_to_string_func=None, **kwargs): if kwargs: raise ValueError( ("The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, hashes, exclude_types," - "exclude_paths, exclude_regex_paths, hasher, ignore_repetition," - "significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes," - "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case") % ', '.join(kwargs.keys())) + "The valid parameters are obj, hashes, exclude_types, " + "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, " + "significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " + "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case " + "number_to_string_func") % ', '.join(kwargs.keys())) self.obj = obj exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance @@ -102,6 +102,7 @@ def __init__(self, # testing the individual hash functions for different types of objects. self.apply_hash = apply_hash self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group + self.number_to_string = number_to_string_func or number_to_string self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)})) @@ -266,11 +267,7 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): def _prep_number(self, obj): if self.significant_digits is not None and ( self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): - obj_s = ("{:.%sf}" % self.significant_digits).format(obj) - - # Special case for 0: "-0.00" should compare equal to "0.00" - if set(obj_s) <= ZERO_DECIMAL_CHARACTERS: - obj_s = "0.00" + obj_s = self.number_to_string(obj, self.significant_digits) result = "number:{}".format(obj_s) else: result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index a8461827..54f41b3f 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -12,9 +12,7 @@ import json import jsonpickle import warnings -import os -from decimal import Decimal from itertools import zip_longest from collections.abc import Mapping, Iterable @@ -24,7 +22,8 @@ IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, convert_item_or_items_into_set_else_none, get_type, convert_item_or_items_into_compiled_regexes_else_none, - type_is_subclass_of_type_group, type_in_type_group, get_doc) + type_is_subclass_of_type_group, type_in_type_group, get_doc, + number_to_string) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship @@ -58,6 +57,7 @@ def __init__(self, ignore_numeric_type_changes=False, ignore_type_subclasses=False, ignore_string_case=False, + number_to_string_func=None, verbose_level=1, view=TEXT_VIEW, hasher=None, @@ -68,7 +68,7 @@ def __init__(self, "The valid parameters are ignore_order, report_repetition, significant_digits, " "exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, " - "verbose_level, view, and hasher.") % ', '.join(kwargs.keys())) + "number_to_string_func, verbose_level, view, and hasher.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order self.ignore_type_in_groups = self.get_ignore_types_in_groups( @@ -86,6 +86,7 @@ def __init__(self, self.ignore_type_subclasses = ignore_type_subclasses self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group self.ignore_string_case = ignore_string_case + self.number_to_string = number_to_string_func or number_to_string self.hashes = {} self.hasher = hasher @@ -226,8 +227,8 @@ def __get_clean_to_keys_mapping(self, keys, level): for key in keys: if self.ignore_string_type_changes and isinstance(key, bytes): clean_key = key.decode('utf-8') - elif self.ignore_numeric_type_changes and type(key) in numbers: - clean_key = ("{:.%sf}" % self.significant_digits).format(key) + elif self.ignore_numeric_type_changes and isinstance(key, numbers): + clean_key = self.number_to_string(key, self.significant_digits) else: clean_key = key if clean_key in result: @@ -464,7 +465,8 @@ def __create_hashtable(self, t, level): ignore_numeric_type_changes=self.ignore_numeric_type_changes, ignore_type_in_groups=self.ignore_type_in_groups, ignore_type_subclasses=self.ignore_type_subclasses, - ignore_string_case=self.ignore_string_case + ignore_string_case=self.ignore_string_case, + number_to_string_func=self.number_to_string, ) item_hash = hashes_all[item] except Exception as e: # pragma: no cover @@ -556,8 +558,7 @@ def __diff_iterable_with_deephash(self, level): def __diff_numbers(self, level): """Diff Numbers""" - if self.significant_digits is not None and isinstance(level.t1, ( - float, complex, Decimal)): + if self.significant_digits is not None: # Bernhard10: I use string formatting for comparison, to be consistent with usecases where # data is read from files that were previousely written from python and # to be consistent with on-screen representation of numbers. @@ -566,13 +567,11 @@ def __diff_numbers(self, level): # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) - t1_s = ("{:.%sf}" % self.significant_digits).format(level.t1) - t2_s = ("{:.%sf}" % self.significant_digits).format(level.t2) - # Special case for 0: "-0.00" should compare equal to "0.00" - if set(t1_s) <= set("-0.") and set(t2_s) <= set("-0."): - return - elif t1_s != t2_s: + t1_s = self.number_to_string(level.t1, self.significant_digits) + t2_s = self.number_to_string(level.t1, self.significant_digits) + + if t1_s != t2_s: self.__report_result('values_changed', level) else: if level.t1 != level.t2: diff --git a/deepdiff/helper.py b/deepdiff/helper.py index a17428f2..50b7a52d 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -38,6 +38,8 @@ ID_PREFIX = '!>*id' +ZERO_DECIMAL_CHARACTERS = set("-0.") + def short_repr(item, max_length=15): """Short representation of item if it is too long""" @@ -212,3 +214,15 @@ def get_doc(doc_filename): except Exception: doc = 'Failed to load the docstrings. Please visit: https://github.com/seperman/deepdiff' return doc + + +def number_to_string(number, significant_digits): + # if isinstance(number, int): + # number = Decimal(str(number)) + if isinstance(number, Decimal): + number = number.quantize(Decimal('0.' + '0' * significant_digits)) + result = ("{:.%sf}" % significant_digits).format(number) + # Special case for 0: "-0.00" should compare equal to "0.00" + if set(result) <= ZERO_DECIMAL_CHARACTERS: + result = "0.00" + return result diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index b952b376..cd5bd702 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1426,6 +1426,14 @@ def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(se alternative_result = {'values_changed': {"root['a']": {'new_value': 11, 'old_value': 10}}} assert result == ddiff or alternative_result == ddiff + @pytest.mark.parametrize("t1, t2, significant_digits, result", [ + ([0.1], [Decimal('0.10')], None, {}), + ([1], [Decimal('1.00000002')], 3, {}), + ]) + def test_ignore_type_in_groups_numbers_when_decimal(self, t1, t2, significant_digits, result): + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, significant_digits=significant_digits) + assert result == ddiff + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_base_level_dictionary_remapping(self): """ From 4646ca9b84d3f37b0d1941bda97265d588b20acd Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 5 Apr 2019 18:54:57 -0700 Subject: [PATCH 03/12] significant digits! --- deepdiff/diff.py | 3 +-- deepdiff/helper.py | 16 ++++++++++------ tests/test_diff_text.py | 10 ++++++---- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 54f41b3f..50c31d96 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -567,9 +567,8 @@ def __diff_numbers(self, level): # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) - t1_s = self.number_to_string(level.t1, self.significant_digits) - t2_s = self.number_to_string(level.t1, self.significant_digits) + t2_s = self.number_to_string(level.t2, self.significant_digits) if t1_s != t2_s: self.__report_result('values_changed', level) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 50b7a52d..5ac5d9ed 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -4,7 +4,7 @@ import re import os import logging -from decimal import Decimal +from decimal import Decimal, localcontext from collections import namedtuple from ordered_set import OrderedSet @@ -216,12 +216,16 @@ def get_doc(doc_filename): return doc -def number_to_string(number, significant_digits): - # if isinstance(number, int): - # number = Decimal(str(number)) +def number_to_string(number, significant_digits, using="{:.%sf}"): + """ + Convert numbers to string considering significant digits. + """ if isinstance(number, Decimal): - number = number.quantize(Decimal('0.' + '0' * significant_digits)) - result = ("{:.%sf}" % significant_digits).format(number) + tup = number.as_tuple() + with localcontext() as ctx: + ctx.prec = len(tup.digits) + tup.exponent + significant_digits + number = number.quantize(Decimal('0.' + '0' * significant_digits)) + result = (using % significant_digits).format(number) # Special case for 0: "-0.00" should compare equal to "0.00" if set(result) <= ZERO_DECIMAL_CHARACTERS: result = "0.00" diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index cd5bd702..978750fa 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -74,10 +74,10 @@ def test_type_change_numeric(self): @pytest.mark.parametrize("t1, t2, expected_result", [ - (10, 10.0, {}), + # (10, 10.0, {}), (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), - (Decimal(10), 10.0, {}), - ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), + # (Decimal(10), 10.0, {}), + # ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), ]) def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) @@ -1427,7 +1427,9 @@ def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(se assert result == ddiff or alternative_result == ddiff @pytest.mark.parametrize("t1, t2, significant_digits, result", [ - ([0.1], [Decimal('0.10')], None, {}), + ([0.1], [Decimal('0.10')], None, + {'values_changed': {'root[0]': {'new_value': Decimal('0.10'), 'old_value': 0.1}}}), # Due to floating point arithmetics, if you don't pass significant digits, they will be not the same values! + ([0.1], [Decimal('0.10')], 5, {}), # Same inputs as above but with significant digits that is low. ([1], [Decimal('1.00000002')], 3, {}), ]) def test_ignore_type_in_groups_numbers_when_decimal(self, t1, t2, significant_digits, result): From 69a7d6975ac83057e26e136578731296dfdcbac6 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 5 Apr 2019 21:31:08 -0700 Subject: [PATCH 04/12] more tests --- deepdiff/deephash.py | 8 +++++--- deepdiff/diff.py | 11 +++++++---- tests/test_diff_text.py | 15 ++++++++++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index df1e8ba7..fd891899 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -60,6 +60,7 @@ def __init__(self, hasher=None, ignore_repetition=True, significant_digits=None, + significant_digits_formatter="{:.%sf}", apply_hash=True, ignore_type_in_groups=None, ignore_string_type_changes=False, @@ -71,9 +72,9 @@ def __init__(self, if kwargs: raise ValueError( ("The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, hashes, exclude_types, " + "The valid parameters are obj, hashes, exclude_types, significant_digits, " "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, " - "significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " + "significant_digits_formatter, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case " "number_to_string_func") % ', '.join(kwargs.keys())) self.obj = obj @@ -89,6 +90,7 @@ def __init__(self, self[UNPROCESSED] = [] self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.significant_digits_formatter = significant_digits_formatter self.ignore_type_in_groups = self.get_ignore_types_in_groups( ignore_type_in_groups=ignore_type_in_groups, ignore_string_type_changes=ignore_string_type_changes, @@ -267,7 +269,7 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): def _prep_number(self, obj): if self.significant_digits is not None and ( self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): - obj_s = self.number_to_string(obj, self.significant_digits) + obj_s = self.number_to_string(obj, self.significant_digits, self.significant_digits_formatter) result = "number:{}".format(obj_s) else: result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 50c31d96..b0b3bae0 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -49,6 +49,7 @@ def __init__(self, ignore_order=False, report_repetition=False, significant_digits=None, + significant_digits_formatter="{:.%sf}", exclude_paths=None, exclude_regex_paths=None, exclude_types=None, @@ -66,7 +67,7 @@ def __init__(self, raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are ignore_order, report_repetition, significant_digits, " - "exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " + "significant_digits_formatter, exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, " "number_to_string_func, verbose_level, view, and hasher.") % ', '.join(kwargs.keys())) @@ -91,6 +92,7 @@ def __init__(self, self.hasher = hasher self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.significant_digits_formatter = significant_digits_formatter self.tree = TreeResult() @@ -228,7 +230,7 @@ def __get_clean_to_keys_mapping(self, keys, level): if self.ignore_string_type_changes and isinstance(key, bytes): clean_key = key.decode('utf-8') elif self.ignore_numeric_type_changes and isinstance(key, numbers): - clean_key = self.number_to_string(key, self.significant_digits) + clean_key = self.number_to_string(key, self.significant_digits, self.significant_digits_formatter) else: clean_key = key if clean_key in result: @@ -461,6 +463,7 @@ def __create_hashtable(self, t, level): hasher=self.hasher, ignore_repetition=not self.report_repetition, significant_digits=self.significant_digits, + significant_digits_formatter=self.significant_digits_formatter, ignore_string_type_changes=self.ignore_string_type_changes, ignore_numeric_type_changes=self.ignore_numeric_type_changes, ignore_type_in_groups=self.ignore_type_in_groups, @@ -567,8 +570,8 @@ def __diff_numbers(self, level): # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) - t1_s = self.number_to_string(level.t1, self.significant_digits) - t2_s = self.number_to_string(level.t2, self.significant_digits) + t1_s = self.number_to_string(level.t1, self.significant_digits, self.significant_digits_formatter) + t2_s = self.number_to_string(level.t2, self.significant_digits, self.significant_digits_formatter) if t1_s != t2_s: self.__report_result('values_changed', level) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 978750fa..6389c235 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1307,11 +1307,14 @@ def test_int_to_unicode(self): } assert result == ddiff - def test_significant_digits_for_decimals(self): - t1 = Decimal('2.5') - t2 = Decimal('1.5') - ddiff = DeepDiff(t1, t2, significant_digits=0) - assert {} == ddiff + @pytest.mark.parametrize("t1, t2, significant_digits, result", [ + (Decimal('2.5'), Decimal('1.5'), 0, {}), + (Decimal('2.5'), Decimal('1.5'), 1, {'values_changed': {'root': {'new_value': Decimal('1.5'), 'old_value': Decimal('2.5')}}}), + (Decimal('2.5'), Decimal(2.5), 3, {}), + ]) + def test_significant_digits(self, t1, t2, significant_digits, result): + ddiff = DeepDiff(t1, t2, significant_digits=significant_digits) + assert result == ddiff def test_significant_digits_for_complex_imaginary_part(self): t1 = 1.23 + 1.222254j @@ -1430,6 +1433,8 @@ def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(se ([0.1], [Decimal('0.10')], None, {'values_changed': {'root[0]': {'new_value': Decimal('0.10'), 'old_value': 0.1}}}), # Due to floating point arithmetics, if you don't pass significant digits, they will be not the same values! ([0.1], [Decimal('0.10')], 5, {}), # Same inputs as above but with significant digits that is low. + ([-0.1], [-Decimal('0.10')], 5, {}), + ([-Decimal('0.102')], [-Decimal('0.10')], 2, {}), ([1], [Decimal('1.00000002')], 3, {}), ]) def test_ignore_type_in_groups_numbers_when_decimal(self, t1, t2, significant_digits, result): From 914f678689b4ebb75403a8009641d2897074c32d Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 5 Apr 2019 23:31:58 -0700 Subject: [PATCH 05/12] wip --- deepdiff/deephash.py | 10 +++---- deepdiff/deephash_doc.rst | 18 ++++++++++--- deepdiff/diff.py | 19 ++++++++----- deepdiff/diff_doc.rst | 57 +++++++++++++++++++++++++++++++++------ deepdiff/helper.py | 12 ++++++++- tests/test_diff_text.py | 41 +++++++++++++++++++++++----- tests/test_hash.py | 46 ++++++++++++++++++++++++++----- 7 files changed, 164 insertions(+), 39 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index fd891899..05205baf 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -60,7 +60,7 @@ def __init__(self, hasher=None, ignore_repetition=True, significant_digits=None, - significant_digits_formatter="{:.%sf}", + number_format_notation="f", apply_hash=True, ignore_type_in_groups=None, ignore_string_type_changes=False, @@ -74,7 +74,7 @@ def __init__(self, ("The following parameter(s) are not valid: %s\n" "The valid parameters are obj, hashes, exclude_types, significant_digits, " "exclude_paths, exclude_regex_paths, hasher, ignore_repetition, " - "significant_digits_formatter, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " + "number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, " "ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case " "number_to_string_func") % ', '.join(kwargs.keys())) self.obj = obj @@ -90,7 +90,7 @@ def __init__(self, self[UNPROCESSED] = [] self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) - self.significant_digits_formatter = significant_digits_formatter + self.number_format_notation = number_format_notation self.ignore_type_in_groups = self.get_ignore_types_in_groups( ignore_type_in_groups=ignore_type_in_groups, ignore_string_type_changes=ignore_string_type_changes, @@ -146,7 +146,6 @@ def murmur3_128bit(obj): return mmh3.hash128(obj, MURMUR_SEED) def __getitem__(self, obj): - # changed_to_id = False key = obj result = None @@ -269,7 +268,8 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): def _prep_number(self, obj): if self.significant_digits is not None and ( self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): - obj_s = self.number_to_string(obj, self.significant_digits, self.significant_digits_formatter) + obj_s = self.number_to_string(obj, significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) result = "number:{}".format(obj_s) else: result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 99d27b59..e7371ad5 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -54,12 +54,22 @@ ignore_repetition: Boolean, default = True But if you are using DeepHash directly, you can set this parameter. significant_digits : int >= 0, default=None - If it is a non negative integer, it compares only that many digits AFTER - the decimal point. + By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - This only affects floats, decimal.Decimal and complex numbers. + Important: This will affect ANY number comparison when it is set. - Take a look at DeepDiff.diff docs for explanation of how this works. + Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 55. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. + + Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + + When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. + +number_format_notation : string, default="f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. apply_hash: Boolean, default = True DeepHash at its core is doing deterministic serialization of objects into strings. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index b0b3bae0..e249f1fe 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -49,7 +49,7 @@ def __init__(self, ignore_order=False, report_repetition=False, significant_digits=None, - significant_digits_formatter="{:.%sf}", + number_format_notation="f", exclude_paths=None, exclude_regex_paths=None, exclude_types=None, @@ -67,7 +67,7 @@ def __init__(self, raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are ignore_order, report_repetition, significant_digits, " - "significant_digits_formatter, exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " + "number_format_notation, exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, " "number_to_string_func, verbose_level, view, and hasher.") % ', '.join(kwargs.keys())) @@ -92,7 +92,7 @@ def __init__(self, self.hasher = hasher self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) - self.significant_digits_formatter = significant_digits_formatter + self.number_format_notation = number_format_notation self.tree = TreeResult() @@ -230,7 +230,8 @@ def __get_clean_to_keys_mapping(self, keys, level): if self.ignore_string_type_changes and isinstance(key, bytes): clean_key = key.decode('utf-8') elif self.ignore_numeric_type_changes and isinstance(key, numbers): - clean_key = self.number_to_string(key, self.significant_digits, self.significant_digits_formatter) + clean_key = self.number_to_string(key, significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) else: clean_key = key if clean_key in result: @@ -463,7 +464,7 @@ def __create_hashtable(self, t, level): hasher=self.hasher, ignore_repetition=not self.report_repetition, significant_digits=self.significant_digits, - significant_digits_formatter=self.significant_digits_formatter, + number_format_notation=self.number_format_notation, ignore_string_type_changes=self.ignore_string_type_changes, ignore_numeric_type_changes=self.ignore_numeric_type_changes, ignore_type_in_groups=self.ignore_type_in_groups, @@ -570,8 +571,12 @@ def __diff_numbers(self, level): # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001 # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number) - t1_s = self.number_to_string(level.t1, self.significant_digits, self.significant_digits_formatter) - t2_s = self.number_to_string(level.t2, self.significant_digits, self.significant_digits_formatter) + t1_s = self.number_to_string(level.t1, + significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) + t2_s = self.number_to_string(level.t2, + significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) if t1_s != t2_s: self.__report_result('values_changed', level) diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 470f068c..17f8bbcb 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -14,28 +14,39 @@ t1 : A dictionary, list, string or any python object that has __dict__ or __slot t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ The second item is to be compared to the first one -ignore_order : Boolean, defalt=False ignores orders for iterables. +ignore_order : Boolean, defalt=False + ignores orders for iterables Note that if you have iterables contatining any unhashable, ignoring order can be expensive. Normally ignore_order does not report duplicates and repetition changes. In order to report repetitions, set report_repetition=True in addition to ignore_order=True -report_repetition : Boolean, default=False reports repetitions when set True +report_repetition : Boolean, default=False + reports repetitions when set True ONLY when ignore_order is set True too. This works for iterables. This feature currently is experimental and is not production ready. -significant_digits : int >= 0, default=None. - If it is a non negative integer, it compares only that many digits AFTER - the decimal point. +significant_digits : int >= 0, default=None + By default the significant_digits compares only that many digits AFTER the decimal point. However you can set override that by setting the number_format_notation="e" which will make it mean the digits in scientific notation. - This only affects floats, decimal.Decimal and complex. + Important: This will affect ANY number comparison when it is set. - Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits + Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 55. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) -verbose_level : int >= 0, default = 1. + When you set the number_format_notation="e", we use "{:.Xe}".format(Your Number) where X=significant_digits. + +number_format_notation : string, default="f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + +number_to_string_func : function, default=None + This is an advanced feature to give the user the full control into overriding how numbers are converted to strings for comparison. The default function is defined in https://github.com/seperman/deepdiff/blob/master/deepdiff/helper.py and is called number_to_string. You can define your own function to do that. + +verbose_level: int >= 0, default = 1 Higher verbose level shows you more details. For example verbose level 1 shows what dictionary item are added or removed. And verbose level 2 shows the value of the items that are added or removed too. @@ -292,6 +303,34 @@ Approximate float comparison (Significant digits after the point): {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} +Approximate number comparison (significant_digits after the decimal point in scientific notation) + >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="f") # default is "f" + {'values_changed': {'root': {'new_value': 1020, 'old_value': 1024}}} + >>> DeepDiff(1024, 1020, significant_digits=2, number_format_notation="e") + {} + +Defining your own number_to_string_func + Lets say you want the numbers comparison do it in logarithmic scale and in scientific notation. + >>> import math + >>> from deepdiff import DeepDiff + >>> from deepdiff.helper import number_to_string + >>> + >>> + >>> def log_number_to_string(number, *args, **kwargs): + ... number = math.log(number) + ... return number_to_string(number, *args, **kwargs) + ... + >>> + >>> DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", number_to_string_func=log_number_to_string) + {} + >>> + >>> t1 = [10, 100000] + >>> t2 = [11, 100021] + >>> + >>> DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", number_to_string_func=log_number_to_string) + {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} + + .. note:: All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. @@ -446,6 +485,8 @@ ignore_string_case >>> DeepDiff(t1='Hello', t2='heLLO', ignore_string_case=True) {} + + **Tree View** Starting the version 3 You can chooe the view into the deepdiff results. diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 5ac5d9ed..fd314288 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -216,10 +216,20 @@ def get_doc(doc_filename): return doc -def number_to_string(number, significant_digits, using="{:.%sf}"): +number_formatting = { + "f": r'{:.%sf}', + "e": r'{:.%se}' +} + + +def number_to_string(number, significant_digits, number_format_notation="f"): """ Convert numbers to string considering significant digits. """ + try: + using = number_formatting[number_format_notation] + except KeyError: + raise ValueError("number_format_notation got invalid value of {}. The valid values are 'f' and 'e'".format(number_format_notation)) from None if isinstance(number, Decimal): tup = number.as_tuple() with localcontext() as ctx: diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 6389c235..0774e242 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1,12 +1,15 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import math import datetime import pytest import logging +from unittest import mock from decimal import Decimal from deepdiff import DeepDiff +from deepdiff.helper import number_to_string from tests import CustomClass -from unittest import mock + logging.disable(logging.CRITICAL) @@ -1307,13 +1310,14 @@ def test_int_to_unicode(self): } assert result == ddiff - @pytest.mark.parametrize("t1, t2, significant_digits, result", [ - (Decimal('2.5'), Decimal('1.5'), 0, {}), - (Decimal('2.5'), Decimal('1.5'), 1, {'values_changed': {'root': {'new_value': Decimal('1.5'), 'old_value': Decimal('2.5')}}}), - (Decimal('2.5'), Decimal(2.5), 3, {}), + @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [ + (Decimal('2.5'), Decimal('1.5'), 0, "f", {}), + (Decimal('2.5'), Decimal('1.5'), 1, "f", {'values_changed': {'root': {'new_value': Decimal('1.5'), 'old_value': Decimal('2.5')}}}), + (Decimal('2.5'), Decimal(2.5), 3, "f", {}), + (1024, 1022, 2, "e", {}), ]) - def test_significant_digits(self, t1, t2, significant_digits, result): - ddiff = DeepDiff(t1, t2, significant_digits=significant_digits) + def test_significant_digits_and_notation(self, t1, t2, significant_digits, number_format_notation, result): + ddiff = DeepDiff(t1, t2, significant_digits=significant_digits, number_format_notation=number_format_notation) assert result == ddiff def test_significant_digits_for_complex_imaginary_part(self): @@ -1375,6 +1379,29 @@ def test_negative_significant_digits(self): with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) + def test_number_to_string_func(self): + def log_number_to_string(number, *args, **kwargs): + number = math.log(number) + return number_to_string(number, *args, **kwargs) + + ddiff = DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", + number_to_string_func=log_number_to_string) + + assert {} == ddiff + + t1 = [10, 100000] + t2 = [11, 100021] + + ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", + number_to_string_func=log_number_to_string) + result = {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} + + assert result == ddiff + + ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", + number_to_string_func=log_number_to_string, ignore_order=True) + assert {} == ddiff + def test_ignore_type_in_groups(self): t1 = [1, 2, 3] t2 = [1.0, 2.0, 3.0] diff --git a/tests/test_hash.py b/tests/test_hash.py index 53dbfd90..b9cbdb5d 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -3,9 +3,10 @@ import re import pytest import logging +import math from deepdiff import DeepHash from deepdiff.deephash import prepare_string_for_hashing, unprocessed -from deepdiff.helper import pypy3, get_id +from deepdiff.helper import pypy3, get_id, number_to_string from collections import namedtuple from functools import partial from enum import Enum @@ -287,13 +288,44 @@ def test_same_sets_same_hash(self): assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] - def test_similar_sets_with_significant_digits_same_hash(self): - t1 = {0.012, 0.98} - t2 = {0.013, 0.99} - t1_hash = DeepHashPrep(t1, significant_digits=1) - t2_hash = DeepHashPrep(t2, significant_digits=1) + @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [ + ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:set:number:0.00,number:1.0'), + (100000, 100021, 4, "e", 'set:set:number:0.00,number:1.0'), + ]) + def test_similar_significant_hash(self, t1, t2, significant_digits, + number_format_notation, result): + t1_hash = DeepHashPrep(t1, significant_digits=significant_digits, + number_format_notation=number_format_notation) + t2_hash = DeepHashPrep(t2, significant_digits=significant_digits, + number_format_notation=number_format_notation) + + if result: + assert result == t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + else: + assert t1_hash[get_id(t1)] != t2_hash[get_id(t2)] - assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + # def test_number_to_string_func(self): + # def log_number_to_string(number, *args, **kwargs): + # number = math.log(number) + # return number_to_string(number, *args, **kwargs) + + # ddiff = DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", + # number_to_string_func=log_number_to_string) + + # assert {} == ddiff + + # t1 = [10, 100000] + # t2 = [11, 100021] + + # ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", + # number_to_string_func=log_number_to_string) + # result = {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} + + # assert result == ddiff + + # ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", + # number_to_string_func=log_number_to_string, ignore_order=True) + # assert {} == ddiff def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] From d45c51d0826113d52a656bb377ec09a5e6e6bbf2 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 6 Apr 2019 22:57:50 -0700 Subject: [PATCH 06/12] fixing logic --- deepdiff/deephash.py | 22 ++++++++++------------ deepdiff/helper.py | 2 +- tests/test_hash.py | 39 +++++++++++++++------------------------ 3 files changed, 26 insertions(+), 37 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 05205baf..c678e8f7 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -232,8 +232,9 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut type_str = 'dict' return "%s:{%s}" % (type_str, result) - def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): - return "set:{}".format(self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)) + # def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): + # import pytest; pytest.set_trace() + # return "set:{}".format(self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)) def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): @@ -266,14 +267,11 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): return result def _prep_number(self, obj): - if self.significant_digits is not None and ( - self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): - obj_s = self.number_to_string(obj, significant_digits=self.significant_digits, - number_format_notation=self.number_format_notation) - result = "number:{}".format(obj_s) - else: - result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) - return result + type_ = "number" if self.ignore_numeric_type_changes else obj.__class__.__name__ + if self.significant_digits is not None: + obj = self.number_to_string(obj, significant_digits=self.significant_digits, + number_format_notation=self.number_format_notation) + return KEY_TO_VAL_STR.format(type_, obj) def _prep_tuple(self, obj, parent, parents_ids): # Checking to see if it has _fields. Which probably means it is a named @@ -320,8 +318,8 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): elif isinstance(obj, tuple): result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids) - elif isinstance(obj, (set, frozenset)): - result = self._prep_set(obj=obj, parent=parent, parents_ids=parents_ids) + # elif isinstance(obj, (set, frozenset)): + # result = self._prep_set(obj=obj, parent=parent, parents_ids=parents_ids) elif isinstance(obj, Iterable): result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index fd314288..7230001c 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -218,7 +218,7 @@ def get_doc(doc_filename): number_formatting = { "f": r'{:.%sf}', - "e": r'{:.%se}' + "e": r'{:.%se}', } diff --git a/tests/test_hash.py b/tests/test_hash.py index b9cbdb5d..4b656770 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -289,8 +289,8 @@ def test_same_sets_same_hash(self): assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [ - ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:set:number:0.00,number:1.0'), - (100000, 100021, 4, "e", 'set:set:number:0.00,number:1.0'), + ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:float:0.00,float:1.0'), + (100000, 100021, 3, "e", 'int:1.000e+05'), ]) def test_similar_significant_hash(self, t1, t2, significant_digits, number_format_notation, result): @@ -300,32 +300,23 @@ def test_similar_significant_hash(self, t1, t2, significant_digits, number_format_notation=number_format_notation) if result: - assert result == t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + assert result == t1_hash[t1] == t2_hash[t2] else: - assert t1_hash[get_id(t1)] != t2_hash[get_id(t2)] + assert t1_hash[t1] != t2_hash[t2] - # def test_number_to_string_func(self): - # def log_number_to_string(number, *args, **kwargs): - # number = math.log(number) - # return number_to_string(number, *args, **kwargs) + def test_number_to_string_func(self): + def custom_number_to_string(number, *args, **kwargs): + number = 100 if number < 100 else number + return number_to_string(number, *args, **kwargs) - # ddiff = DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", - # number_to_string_func=log_number_to_string) + t1 = [10, 12, 100000] + t2 = [50, 63, 100021] + t1_hash = DeepHashPrep(t1, significant_digits=4, number_format_notation="e", + number_to_string_func=custom_number_to_string) + t2_hash = DeepHashPrep(t2, significant_digits=4, number_format_notation="e", + number_to_string_func=custom_number_to_string) - # assert {} == ddiff - - # t1 = [10, 100000] - # t2 = [11, 100021] - - # ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", - # number_to_string_func=log_number_to_string) - # result = {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} - - # assert result == ddiff - - # ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", - # number_to_string_func=log_number_to_string, ignore_order=True) - # assert {} == ddiff + assert t1_hash[10] == t2_hash[50] == t1_hash[12] == t2_hash[63] != t1_hash[100000] def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] From 21823e1e0175819226d66787013ca9454481aa26 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 6 Apr 2019 23:05:39 -0700 Subject: [PATCH 07/12] fixing tests --- deepdiff/deephash.py | 7 ------- tests/test_diff_text.py | 28 ++++++++++------------------ 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index c678e8f7..d22f1199 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -232,10 +232,6 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut type_str = 'dict' return "%s:{%s}" % (type_str, result) - # def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): - # import pytest; pytest.set_trace() - # return "set:{}".format(self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)) - def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = defaultdict(int) @@ -318,9 +314,6 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): elif isinstance(obj, tuple): result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids) - # elif isinstance(obj, (set, frozenset)): - # result = self._prep_set(obj=obj, parent=parent, parents_ids=parents_ids) - elif isinstance(obj, Iterable): result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 0774e242..fd7f749a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1379,27 +1379,19 @@ def test_negative_significant_digits(self): with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) - def test_number_to_string_func(self): - def log_number_to_string(number, *args, **kwargs): - number = math.log(number) + @pytest.mark.parametrize("t1, t2, significant_digits, ignore_order", [ + (100000, 100021, 3, False), + ([10, 12, 100000], [50, 63, 100021], 3, False), + ([10, 12, 100000], [50, 63, 100021], 3, True), + ]) + def test_number_to_string_func(self, t1, t2, significant_digits, ignore_order): + def custom_number_to_string(number, *args, **kwargs): + number = 100 if number < 100 else number return number_to_string(number, *args, **kwargs) - ddiff = DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", - number_to_string_func=log_number_to_string) - - assert {} == ddiff - - t1 = [10, 100000] - t2 = [11, 100021] - - ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", - number_to_string_func=log_number_to_string) - result = {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} - - assert result == ddiff + ddiff = DeepDiff(100000, 100021, significant_digits=3, number_format_notation="e", + number_to_string_func=custom_number_to_string) - ddiff = DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", - number_to_string_func=log_number_to_string, ignore_order=True) assert {} == ddiff def test_ignore_type_in_groups(self): From 0b22275d7b71edff2fea27cedac212f28d8711c1 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 6 Apr 2019 23:36:00 -0700 Subject: [PATCH 08/12] fixing issues with significant digits --- deepdiff/base.py | 5 +++-- deepdiff/deephash.py | 4 +--- deepdiff/diff.py | 14 +++++++++----- deepdiff/helper.py | 2 ++ tests/test_diff_text.py | 25 +++++++++++++++++++++++++ tests/test_helper.py | 34 +++++++++++++++++++++++++++++++++- 6 files changed, 73 insertions(+), 11 deletions(-) diff --git a/deepdiff/base.py b/deepdiff/base.py index 7b798758..1a578cb3 100644 --- a/deepdiff/base.py +++ b/deepdiff/base.py @@ -10,11 +10,12 @@ class Base: strings = strings def get_significant_digits(self, significant_digits, ignore_numeric_type_changes): - if ignore_numeric_type_changes and not significant_digits: - significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES if significant_digits is not None and significant_digits < 0: raise ValueError( "significant_digits must be None or a non-negative integer") + if significant_digits is None: + if ignore_numeric_type_changes: + significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES return significant_digits def get_ignore_types_in_groups(self, ignore_type_in_groups, diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index d22f1199..6a24e691 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -10,7 +10,7 @@ convert_item_or_items_into_set_else_none, get_doc, convert_item_or_items_into_compiled_regexes_else_none, get_id, type_is_subclass_of_type_group, type_in_type_group, - number_to_string) + number_to_string, KEY_TO_VAL_STR) from deepdiff.base import Base logger = logging.getLogger(__name__) @@ -27,8 +27,6 @@ INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') -KEY_TO_VAL_STR = "{}:{}" - def prepare_string_for_hashing(obj, ignore_string_type_changes=False, ignore_string_case=False): """ diff --git a/deepdiff/diff.py b/deepdiff/diff.py index e249f1fe..1f4ab087 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -23,7 +23,7 @@ convert_item_or_items_into_set_else_none, get_type, convert_item_or_items_into_compiled_regexes_else_none, type_is_subclass_of_type_group, type_in_type_group, get_doc, - number_to_string) + number_to_string, KEY_TO_VAL_STR) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship @@ -561,8 +561,13 @@ def __diff_iterable_with_deephash(self, level): def __diff_numbers(self, level): """Diff Numbers""" + t1_type = "" if self.ignore_numeric_type_changes else level.t1.__class__.__name__ + t2_type = "" if self.ignore_numeric_type_changes else level.t2.__class__.__name__ - if self.significant_digits is not None: + if self.significant_digits is None: + if level.t1 != level.t2: + self.__report_result('values_changed', level) + else: # Bernhard10: I use string formatting for comparison, to be consistent with usecases where # data is read from files that were previousely written from python and # to be consistent with on-screen representation of numbers. @@ -578,11 +583,10 @@ def __diff_numbers(self, level): significant_digits=self.significant_digits, number_format_notation=self.number_format_notation) + t1_s = KEY_TO_VAL_STR.format(t1_type, t1_s) + t2_s = KEY_TO_VAL_STR.format(t2_type, t2_s) if t1_s != t2_s: self.__report_result('values_changed', level) - else: - if level.t1 != level.t2: - self.__report_result('values_changed', level) def __diff_types(self, level): """Diff types""" diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 7230001c..e523992a 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -40,6 +40,8 @@ ZERO_DECIMAL_CHARACTERS = set("-0.") +KEY_TO_VAL_STR = "{}:{}" + def short_repr(item, max_length=15): """Short representation of item if it is too long""" diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index fd7f749a..669ce5d4 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1394,6 +1394,31 @@ def custom_number_to_string(number, *args, **kwargs): assert {} == ddiff + @pytest.mark.parametrize("t1, t2, significant_digits, expected_result", + [ + (10, 10.0, 5, {}), + (10, 10.2, 5, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), + (10, 10.2, 0, {}), + (Decimal(10), 10, 0, {}), + (Decimal(10), 10, 10, {}), + (Decimal(10), 10.0, 0, {}), + (Decimal(10), 10.0, 10, {}), + (Decimal('10.0'), 10.0, 5, {}), + (Decimal('10.01'), 10.01, 1, {}), + (Decimal('10.01'), 10.01, 2, {}), + (Decimal('10.01'), 10.01, 5, {}), + (Decimal('10.01'), 10.01, 8, {}), + (Decimal('10.010'), 10.01, 3, {}), + (Decimal('100000.1'), 100000.1, 0, {}), + (Decimal('100000.1'), 100000.1, 1, {}), + (Decimal('100000.1'), 100000.1, 5, {}), + (Decimal('100000'), 100000.1, 0, {}), + (Decimal('100000'), 100000.1, 1, {'values_changed': {'root': {'new_value': 100000.1, 'old_value': Decimal('100000')}}}), + ]) + def test_decimal_digits(self, t1, t2, significant_digits, expected_result): + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, significant_digits=significant_digits) + assert expected_result == ddiff + def test_ignore_type_in_groups(self): t1 = [1, 2, 3] t2 = [1.0, 2.0, 3.0] diff --git a/tests/test_helper.py b/tests/test_helper.py index 7b1aefe5..30307f8c 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from deepdiff.helper import short_repr +import pytest +from decimal import Decimal +from deepdiff.helper import short_repr, number_to_string class TestHelper: @@ -17,3 +19,33 @@ def test_short_repr_when_long(self): item = {'Eat more': 'burritos'} output = short_repr(item) assert output == "{'Eat more':...}" + + @pytest.mark.parametrize("t1, t2, significant_digits, expected_result", + [ + (10, 10.0, 5, True), + (10, 10.2, 5, ('10.00000', '10.20000')), + (10, 10.2, 0, True), + (Decimal(10), 10, 0, True), + (Decimal(10), 10, 10, True), + (Decimal(10), 10.0, 0, True), + (Decimal(10), 10.0, 10, True), + (Decimal('10.0'), 10.0, 5, True), + (Decimal('10.01'), 10.01, 1, True), + (Decimal('10.01'), 10.01, 2, True), + (Decimal('10.01'), 10.01, 5, True), + (Decimal('10.01'), 10.01, 8, True), + (Decimal('10.010'), 10.01, 3, True), + (Decimal('100000.1'), 100000.1, 0, True), + (Decimal('100000.1'), 100000.1, 1, True), + (Decimal('100000.1'), 100000.1, 5, True), + (Decimal('100000'), 100000.1, 0, True), + (Decimal('100000'), 100000.1, 1, ('100000.0', '100000.1')), + ]) + def test_number_to_string_decimal_digits(self, t1, t2, significant_digits, expected_result): + st1 = number_to_string(t1, significant_digits=significant_digits, number_format_notation="f") + st2 = number_to_string(t2, significant_digits=significant_digits, number_format_notation="f") + if expected_result is True: + assert st1 == st2 + else: + assert st1 == expected_result[0] + assert st2 == expected_result[1] From 0b34605d4516ac07f41992bad18fafd258224483 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 6 Apr 2019 23:42:49 -0700 Subject: [PATCH 09/12] reducing the default significant digits --- deepdiff/base.py | 2 +- deepdiff/deephash_doc.rst | 2 +- tests/test_diff_text.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deepdiff/base.py b/deepdiff/base.py index 1a578cb3..91d89b47 100644 --- a/deepdiff/base.py +++ b/deepdiff/base.py @@ -2,7 +2,7 @@ from deepdiff.helper import strings, numbers -DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 +DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 12 class Base: diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index e7371ad5..e5e5d326 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -58,7 +58,7 @@ significant_digits : int >= 0, default=None Important: This will affect ANY number comparison when it is set. - Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 55. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. + Note: If ignore_numeric_type_changes is set to True and you have left significant_digits to the default of None, it gets automatically set to 12. The reason is that normally when numbers from 2 different types are compared, instead of comparing the values, we only report the type change. However when ignore_numeric_type_changes=True, in order compare numbers from different types to each other, we need to convert them all into strings. The significant_digits will be used to make sure we accurately convert all the numbers into strings in order to report the changes between them. Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits when the number_format_notation is left as the default of "f" meaning fixed point. diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 669ce5d4..95654b90 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1474,8 +1474,8 @@ def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(se assert result == ddiff or alternative_result == ddiff @pytest.mark.parametrize("t1, t2, significant_digits, result", [ - ([0.1], [Decimal('0.10')], None, - {'values_changed': {'root[0]': {'new_value': Decimal('0.10'), 'old_value': 0.1}}}), # Due to floating point arithmetics, if you don't pass significant digits, they will be not the same values! + ([0.1], [Decimal('0.10')], 55, + {'values_changed': {'root[0]': {'new_value': Decimal('0.10'), 'old_value': 0.1}}}), # Due to floating point arithmetics with high significant digits. ([0.1], [Decimal('0.10')], 5, {}), # Same inputs as above but with significant digits that is low. ([-0.1], [-Decimal('0.10')], 5, {}), ([-Decimal('0.102')], [-Decimal('0.10')], 2, {}), From 11408c85ea9bf741134c64b1b662b4051cea8346 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 7 Apr 2019 00:04:36 -0700 Subject: [PATCH 10/12] tests back to 100% --- deepdiff/deephash.py | 5 ++--- deepdiff/diff.py | 8 +++++--- deepdiff/helper.py | 4 ++-- tests/test_diff_text.py | 13 ++++++++----- tests/test_hash.py | 14 +++++++++++--- tests/test_helper.py | 4 ++++ 6 files changed, 32 insertions(+), 16 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 6a24e691..3cb3bed4 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -3,7 +3,6 @@ import logging from collections.abc import Iterable, MutableMapping from collections import defaultdict -from decimal import Decimal from hashlib import sha1, sha256 from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, @@ -16,8 +15,8 @@ try: import mmh3 -except ImportError: - mmh3 = False +except ImportError: # pragma: no cover + mmh3 = False # pragma: no cover UNPROCESSED = 'unprocessed' MURMUR_SEED = 1203 diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 1f4ab087..f412f893 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -229,9 +229,11 @@ def __get_clean_to_keys_mapping(self, keys, level): for key in keys: if self.ignore_string_type_changes and isinstance(key, bytes): clean_key = key.decode('utf-8') - elif self.ignore_numeric_type_changes and isinstance(key, numbers): + elif isinstance(key, numbers): + type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__ clean_key = self.number_to_string(key, significant_digits=self.significant_digits, number_format_notation=self.number_format_notation) + clean_key = KEY_TO_VAL_STR.format(type_, clean_key) else: clean_key = key if clean_key in result: @@ -561,8 +563,8 @@ def __diff_iterable_with_deephash(self, level): def __diff_numbers(self, level): """Diff Numbers""" - t1_type = "" if self.ignore_numeric_type_changes else level.t1.__class__.__name__ - t2_type = "" if self.ignore_numeric_type_changes else level.t2.__class__.__name__ + t1_type = "number" if self.ignore_numeric_type_changes else level.t1.__class__.__name__ + t2_type = "number" if self.ignore_numeric_type_changes else level.t2.__class__.__name__ if self.significant_digits is None: if level.t1 != level.t2: diff --git a/deepdiff/helper.py b/deepdiff/helper.py index e523992a..fdbb2d09 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -213,8 +213,8 @@ def get_doc(doc_filename): try: with open(os.path.join(current_dir, doc_filename), 'r') as doc_file: doc = doc_file.read() - except Exception: - doc = 'Failed to load the docstrings. Please visit: https://github.com/seperman/deepdiff' + except Exception: # pragma: no cover + doc = 'Failed to load the docstrings. Please visit: https://github.com/seperman/deepdiff' # pragma: no cover return doc diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 95654b90..eb0dc91b 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -69,11 +69,14 @@ def test_item_type_change_for_strings_override(self): } } == ddiff - def test_type_change_numeric(self): - t1 = 10 - t2 = 10.0 - ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True) - assert {} == ddiff + @pytest.mark.parametrize("t1, t2, significant_digits, ignore_order, result", [ + # (10, 10.0, 5, False, {}), + ({10: 'a', 11.1: 'b'}, {10.0: 'a', Decimal('11.1000003'): 'b'}, 5, False, {}), + ]) + def test_type_change_numeric_ignored(self, t1, t2, significant_digits, ignore_order, result): + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, + significant_digits=significant_digits, ignore_order=ignore_order) + assert result == ddiff @pytest.mark.parametrize("t1, t2, expected_result", [ diff --git a/tests/test_hash.py b/tests/test_hash.py index 4b656770..808fad59 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -525,10 +525,10 @@ def test_string_case(self): assert t1_hash == {'Hello': 'str:hello'} -class TestDeepHashSHA1: - """DeepHash with SHA1 Tests.""" +class TestDeepHashSHA: + """DeepHash with SHA Tests.""" - def test_prep_str_sha1(self): + def test_str_sha1(self): obj = "a" expected_result = { obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' @@ -536,6 +536,14 @@ def test_prep_str_sha1(self): result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result + def test_str_sha256(self): + obj = "a" + expected_result = { + obj: 'ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb' + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha256hex) + assert expected_result == result + def test_prep_str_sha1_fail_if_mutable(self): """ This test fails if DeepHash is getting a mutable copy of hashes diff --git a/tests/test_helper.py b/tests/test_helper.py index 30307f8c..78b0fb85 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -49,3 +49,7 @@ def test_number_to_string_decimal_digits(self, t1, t2, significant_digits, expec else: assert st1 == expected_result[0] assert st2 == expected_result[1] + + def test_number_to_string_with_invalid_notation(self): + with pytest.raises(ValueError): + number_to_string(10, significant_digits=4, number_format_notation='blah') From 7b61e31c88c6a7c8d7de8009c20d9603eccf03c7 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 7 Apr 2019 00:21:21 -0700 Subject: [PATCH 11/12] uncommenting tests --- deepdiff/deephash_doc.rst | 122 +++++++++++++++++++++++--------------- tests/test_diff_text.py | 8 +-- 2 files changed, 79 insertions(+), 51 deletions(-) diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index e5e5d326..571cc2d2 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -95,58 +95,15 @@ ignore_string_type_changes: Boolean, default = True string type conversions should not affect the hash output when this is set to True. For example "Hello" and b"Hello" should produce the same hash. -By setting it to True, both the string and bytes of hello return the same hash. - >>> DeepHash(b'hello', ignore_string_type_changes=True) - {b'hello': 221860156526691709602818861774599422448} - >>> DeepHash('hello', ignore_string_type_changes=True) - {'hello': 221860156526691709602818861774599422448} + By setting it to True, both the string and bytes of hello return the same hash. + ignore_numeric_type_changes: Boolean, default = False numeric type conversions should not affect the hash output when this is set to True. For example 10, 10.0 and Decimal(10) should produce the same hash. When ignore_numeric_type_changes is set to True, all numbers are converted - to decimals with the precision of significant_digits parameter. - If no significant_digits is passed by the user, a default value of 55 is used. - - For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 - - That way they both produce the same hash. - - >>> t1 = {1: 1, 2: 2.22} - >>> t2 = {1: 1.0, 2: 2.22} - >>> DeepHash(t1)[1] - 231678797214551245419120414857003063149 - >>> DeepHash(t1)[1.0] - 231678797214551245419120414857003063149 - -You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: - >>> from deepdiff import DeepDiff - >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] - - -ignore_type_in_groups example with custom objects: - >>> class Burrito: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> class Taco: - ... bread = 'flour' - ... def __init__(self): - ... self.spicy = True - ... - >>> - >>> burrito = Burrito() - >>> taco = Taco() - >>> - >>> burritos = [burrito] - >>> tacos = [taco] - >>> - >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) - >>> d1[burrito] == d2[taco] - True + to strings with the precision of significant_digits parameter and number_format_notation notation. + If no significant_digits is passed by the user, a default value of 12 is used. ignore_type_subclasses @@ -221,6 +178,59 @@ But with DeepHash: >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] True +number_format_notation: String, default = "f" + number_format_notation is what defines the meaning of significant digits. The default value of "f" means the digits AFTER the decimal point. "f" stands for fixed point. The other option is "e" which stands for exponent notation or scientific notation. + + +ignore_string_type_changes: Boolean, default = True + By setting it to True, both the string and bytes of hello return the same hash. + >>> DeepHash(b'hello', ignore_string_type_changes=True) + {b'hello': 221860156526691709602818861774599422448} + >>> DeepHash('hello', ignore_string_type_changes=True) + {'hello': 221860156526691709602818861774599422448} + + +ignore_numeric_type_changes: Boolean, default = False + For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 + + That way they both produce the same hash. + + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> DeepHash(t1)[1] + 231678797214551245419120414857003063149 + >>> DeepHash(t1)[1.0] + 231678797214551245419120414857003063149 + + You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> from deepdiff import DeepDiff + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d1[burrito] == d2[taco] + True + ignore_type_subclasses Use ignore_type_subclasses=True so when ignoring type (class), the subclasses of that class are ignored too. @@ -264,3 +274,21 @@ ignore_string_case False >>> DeepHash('hello', ignore_string_case=True)['hello'] == DeepHash('heLLO', ignore_string_case=True)['heLLO'] True + +number_format_notation : string, default="f" + When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: + >>> t1=10002 + >>> t2=10004 + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="f") + >>> + >>> t1_hash[t1] == t2_hash[t2] + False + >>> + >>> + >>> # Now we use the scientific notation + ... t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e") + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e") + >>> + >>> t1_hash[t1] == t2_hash[t2] + True diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index eb0dc91b..e89eeb0a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -70,7 +70,7 @@ def test_item_type_change_for_strings_override(self): } == ddiff @pytest.mark.parametrize("t1, t2, significant_digits, ignore_order, result", [ - # (10, 10.0, 5, False, {}), + (10, 10.0, 5, False, {}), ({10: 'a', 11.1: 'b'}, {10.0: 'a', Decimal('11.1000003'): 'b'}, 5, False, {}), ]) def test_type_change_numeric_ignored(self, t1, t2, significant_digits, ignore_order, result): @@ -80,10 +80,10 @@ def test_type_change_numeric_ignored(self, t1, t2, significant_digits, ignore_or @pytest.mark.parametrize("t1, t2, expected_result", [ - # (10, 10.0, {}), + (10, 10.0, {}), (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), - # (Decimal(10), 10.0, {}), - # ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), + (Decimal(10), 10.0, {}), + ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), ]) def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) From 2ec492db7d0f5eed60afc5d2adc9813e3f861232 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 7 Apr 2019 00:40:11 -0700 Subject: [PATCH 12/12] docs --- deepdiff/deephash_doc.rst | 30 ++++++++++++++++++++++++++++++ deepdiff/diff_doc.rst | 26 +++++++++++--------------- tests/test_diff_text.py | 2 +- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 571cc2d2..c6b588e6 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -132,6 +132,7 @@ If you try to hash it: TypeError: unhashable type: 'dict' But with DeepHash: + >>> from deepdiff import DeepHash >>> obj = {1: 2, 'a': 'b'} >>> DeepHash(obj) @@ -141,11 +142,13 @@ But with DeepHash: DeepHash is calculating the hash of the obj and any other object that obj contains. The output of DeepHash is a dictionary of object IDs to their hashes. In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + >>> hashes = DeepHash(obj) >>> hashes[obj] 34150898645750099477987229399128149852 Which you can write as: + >>> hashes = DeepHash(obj)[obj] At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. @@ -154,27 +157,32 @@ But with DeepHash: Murmur 3 128bit hashing algorithm. If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. Read more about Murmur3 here: https://en.wikipedia.org/wiki/MurmurHash If you do a deep copy of obj, it should still give you the same hash: + >>> from copy import deepcopy >>> obj2 = deepcopy(obj) >>> DeepHash(obj2)[obj2] 34150898645750099477987229399128149852 Note that by default DeepHash will include string type differences. So if your strings were bytes: + >>> obj3 = {1: 2, b'a': b'b'} >>> DeepHash(obj3)[obj3] 64067525765846024488103933101621212760 But if you want the same hash if string types are different, set ignore_string_type_changes to True: + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] 34150898645750099477987229399128149852 ignore_numeric_type_changes is by default False too. + >>> obj1 = {4:10} >>> obj2 = {4.0: Decimal(10.0)} >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] False But by setting it to True, we can get the same hash. + >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] True @@ -184,6 +192,7 @@ number_format_notation: String, default = "f" ignore_string_type_changes: Boolean, default = True By setting it to True, both the string and bytes of hello return the same hash. + >>> DeepHash(b'hello', ignore_string_type_changes=True) {b'hello': 221860156526691709602818861774599422448} >>> DeepHash('hello', ignore_string_type_changes=True) @@ -203,11 +212,13 @@ ignore_numeric_type_changes: Boolean, default = False 231678797214551245419120414857003063149 You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> from deepdiff import DeepDiff >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] ignore_type_in_groups example with custom objects: + >>> class Burrito: ... bread = 'flour' ... def __init__(self): @@ -277,6 +288,7 @@ ignore_string_case number_format_notation : string, default="f" When numbers are converted to the string, you have the choices between "f" as fixed point and "e" as scientific notation: + >>> t1=10002 >>> t2=10004 >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="f") @@ -292,3 +304,21 @@ number_format_notation : string, default="f" >>> >>> t1_hash[t1] == t2_hash[t2] True + +Defining your own number_to_string_func + Lets say you want the hash of numbers below 100 to be the same for some reason. + + >>> from deepdiff import DeepHash + >>> from deepdiff.helper import number_to_string + >>> def custom_number_to_string(number, *args, **kwargs): + ... number = 100 if number < 100 else number + ... return number_to_string(number, *args, **kwargs) + ... + >>> t1 = [10, 12, 100000] + >>> t2 = [50, 63, 100021] + >>> t1_hash = DeepHash(t1, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t2_hash = DeepHash(t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) + >>> t1_hash[t1] == t2_hash[t2] + True + + So both lists produced the same hash thanks to the low significant digits for 100000 vs 100021 and also the custom_number_to_string that converted all numbers below 100 to be 100! diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 17f8bbcb..7af6ff06 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -310,26 +310,22 @@ Approximate number comparison (significant_digits after the decimal point in sci {} Defining your own number_to_string_func - Lets say you want the numbers comparison do it in logarithmic scale and in scientific notation. - >>> import math + Lets say you want the numbers comparison happen only for numbers above 100 for some reason. + >>> from deepdiff import DeepDiff >>> from deepdiff.helper import number_to_string - >>> - >>> - >>> def log_number_to_string(number, *args, **kwargs): - ... number = math.log(number) + >>> def custom_number_to_string(number, *args, **kwargs): + ... number = 100 if number < 100 else number ... return number_to_string(number, *args, **kwargs) ... - >>> - >>> DeepDiff(100000, 100021, significant_digits=4, number_format_notation="e", number_to_string_func=log_number_to_string) + >>> t1 = [10, 12, 100000] + >>> t2 = [50, 63, 100021] + >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e") + {'values_changed': {'root[0]': {'new_value': 50, 'old_value': 10}, 'root[1]': {'new_value': 63, 'old_value': 12}}} + >>> + >>> DeepDiff(t1, t2, significant_digits=3, number_format_notation="e", + ... number_to_string_func=custom_number_to_string) {} - >>> - >>> t1 = [10, 100000] - >>> t2 = [11, 100021] - >>> - >>> DeepDiff(t1, t2, significant_digits=4, number_format_notation="e", number_to_string_func=log_number_to_string) - {'values_changed': {'root[0]': {'new_value': 11, 'old_value': 10}}} - .. note:: All the examples for the text view work for the tree view too. diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index e89eeb0a..9028389f 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1392,7 +1392,7 @@ def custom_number_to_string(number, *args, **kwargs): number = 100 if number < 100 else number return number_to_string(number, *args, **kwargs) - ddiff = DeepDiff(100000, 100021, significant_digits=3, number_format_notation="e", + ddiff = DeepDiff(t1, t2, significant_digits=3, number_format_notation="e", number_to_string_func=custom_number_to_string) assert {} == ddiff