Skip to content

Commit

Permalink
Fixing the issue with exclude_path and hash calculations when diction…
Browse files Browse the repository at this point in the history
…aries were inside iterables. #174
  • Loading branch information
seperman committed Mar 11, 2020
1 parent f9c0534 commit eb3973a
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 8 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ And then running

# ChangeLog

- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174
- v4-3-0: adding exclude_obj_callback
- v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element.
- v4-0-9: Fixing the bug for hashing custom unhashable objects
Expand Down
13 changes: 8 additions & 5 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __init__(self,
ignore_string_case=False,
exclude_obj_callback=None,
number_to_string_func=None,
parent="root",
**kwargs):
if kwargs:
raise ValueError(
Expand All @@ -78,7 +79,7 @@ def __init__(self,
"exclude_paths, exclude_regex_paths, hasher, ignore_repetition, "
"number_format_notation, apply_hash, ignore_type_in_groups, ignore_string_type_changes, "
"ignore_numeric_type_changes, ignore_type_subclasses, ignore_string_case "
"number_to_string_func") % ', '.join(kwargs.keys()))
"number_to_string_func, parent") % ', '.join(kwargs.keys()))
self.obj = obj
exclude_types = set() if exclude_types is None else set(exclude_types)
self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance
Expand Down Expand Up @@ -109,7 +110,7 @@ def __init__(self,
self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
self.number_to_string = number_to_string_func or number_to_string

self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)}))
self._hash(obj, parent=parent, parents_ids=frozenset({get_id(obj)}))

if self[UNPROCESSED]:
logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED]))
Expand Down Expand Up @@ -202,7 +203,6 @@ def _skip_this(self, obj, parent):
skip = True
elif self.exclude_obj_callback and self.exclude_obj_callback(obj, parent):
skip = True

return skip

def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None):
Expand All @@ -218,6 +218,8 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut
key_in_report = key_text % (parent, key_formatted)

key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids)
if not key_hash:
continue
item_id = get_id(item)
if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report):
continue
Expand All @@ -244,15 +246,16 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET):
result = defaultdict(int)

for i, item in enumerate(obj):
if self._skip_this(item, parent="{}[{}]".format(parent, i)):
new_parent = "{}[{}]".format(parent, i)
if self._skip_this(item, parent=new_parent):
continue

item_id = get_id(item)
if parents_ids and item_id in parents_ids:
continue

parents_ids_added = add_to_frozen_set(parents_ids, item_id)
hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added)
hashed = self._hash(item, parent=new_parent, parents_ids=parents_ids_added)
# counting repetitions
result[hashed] += 1

Expand Down
4 changes: 3 additions & 1 deletion deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ def __create_hashtable(self, t, level):
hashes = {}
for (i, item) in enumerate(t):
try:
parent = "{}[{}]".format(level.path(), i)
hashes_all = DeepHash(item,
hashes=self.hashes,
exclude_types=self.exclude_types,
Expand All @@ -485,8 +486,9 @@ def __create_hashtable(self, t, level):
ignore_string_case=self.ignore_string_case,
number_to_string_func=self.number_to_string,
exclude_obj_callback=self.exclude_obj_callback,
parent=parent,
apply_hash=False,
)
# import pytest; pytest.set_trace()
key = item
if item is True:
key = BoolObj.TRUE
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ Indices and tables
Changelog
=========

- v4-3-1: Fixing the issue with exclude_path and hash calculations when dictionaries were inside iterables. https://github.com/seperman/deepdiff/issues/174
- v4-3-0: adding exclude_obj_callback
- v4-2-0: .json property is finally removed. Fix for Py3.10. Dropping support for EOL Python 3.4. Ignoring private keys when calculating hashes. For example __init__ is not a part of hash calculation anymore. Fix for #166 Problem with comparing lists, with an boolean as element.
- v4-1-0: .json property is finally removed.
Expand Down
11 changes: 11 additions & 0 deletions tests/test_diff_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,17 @@ def test_skip_path4(self):
assert 'dictionary_item_added' in ddiff, {}
assert 'dictionary_item_removed' not in ddiff, {}

def test_skip_exclude_path5(self):
exclude_paths = ["root[0]['e']", "root[1]['e']"]

t1 = [{'a': 1, 'b': 'randomString', 'e': "1111"}]
t2 = [{'a': 1, 'b': 'randomString', 'e': "2222"}]

ddiff = DeepDiff(t1, t2, exclude_paths=exclude_paths,
ignore_order=True, report_repetition=False)
result = {}
assert result == ddiff

def test_skip_custom_object_path(self):
t1 = CustomClass(1)
t2 = CustomClass(2)
Expand Down
21 changes: 19 additions & 2 deletions tests/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,23 @@ def test_skip_path(self):
assert 2 in t1_hash
assert t1_hash[2] == t2_hash[2]

def test_skip_path2(self):

obj10 = {'a': 1, 'b': 'f', 'e': "1111", 'foo': {'bar': 'baz'}}
obj11 = {'c': 1, 'd': 'f', 'e': 'Cool'}

obj20 = {'a': 1, 'b': 'f', 'e': 'Cool', 'foo': {'bar': 'baz2'}}
obj21 = {'c': 1, 'd': 'f', 'e': "2222"}

t1 = [obj10, obj11]
t2 = [obj20, obj21]

exclude_paths = ["root[0]['e']", "root[1]['e']", "root[0]['foo']['bar']"]

t1_hash = DeepHashPrep(t1, exclude_paths=exclude_paths)
t2_hash = DeepHashPrep(t2, exclude_paths=exclude_paths)
assert t1_hash[t1] == t2_hash[t2]

def test_skip_regex_path(self):
dic1 = {1: "a"}
t1 = [dic1, 2]
Expand All @@ -520,9 +537,9 @@ def test_skip_regex_path(self):
assert 2 in t1_hash
assert t1_hash[2] == t2_hash[2]

def test_skip_exclude_obj_callback(self):
def test_skip_hash_exclude_obj_callback(self):
def exclude_obj_callback(obj, parent):
return True if parent == "root['x']" or obj == 2 else False
return True if parent == "root[0]['x']" or obj == 2 else False

dic1 = {"x": 1, "y": 2, "z": 3}
t1 = [dic1]
Expand Down

0 comments on commit eb3973a

Please sign in to comment.