From 2b7f40a941a03a9d8c1ed2005b88a32bbb368ef0 Mon Sep 17 00:00:00 2001 From: Julien Gotteland Date: Fri, 27 Jan 2017 12:37:39 +0100 Subject: [PATCH 01/76] Fix issue #47 : support regex in exclude paths. --- README.md | 19 +++++++++++++++++++ README.txt | 5 +++++ deepdiff/diff.py | 6 ++++++ deepdiff/search.py | 6 ++++++ tests/test_diff_text.py | 7 +++++++ tests/test_search.py | 6 ++++++ 6 files changed, 49 insertions(+) diff --git a/README.md b/README.md index e95571ac..71ad706f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ + # deepdiff v 3.0.0 @@ -115,6 +116,15 @@ which will print you: {} ``` +You can also exclude regular expression : + +```python +>>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] +>>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] +>>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) +{} +``` + ## Significant Digits Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits @@ -445,6 +455,15 @@ And if you would like to know the values of items added or removed, please set t {} ``` +You can also exclude regular expression : + +```python +>>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] +>>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] +>>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) +{} +``` + All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. diff --git a/README.txt b/README.txt index 492650a5..fb418867 100644 --- a/README.txt +++ b/README.txt @@ -192,6 +192,11 @@ Exclude part of your object tree from comparison: >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) {} +You can also exclude regular expression : + >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) + {} Using DeepDiff in unit tests result is the output of the function that is being tests. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index a696f1e8..1398768b 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -12,6 +12,7 @@ import difflib import logging +import re from decimal import Decimal @@ -600,6 +601,7 @@ def __init__(self, report_repetition=False, significant_digits=None, exclude_paths=set(), + exclude_regex_paths=set(), exclude_types=set(), verbose_level=1, view='text', @@ -613,6 +615,7 @@ def __init__(self, self.ignore_order = ignore_order self.report_repetition = report_repetition self.exclude_paths = set(exclude_paths) + self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in set(exclude_regex_paths)] self.exclude_types = set(exclude_types) self.exclude_types_tuple = tuple( exclude_types) # we need tuple for checking isinstance @@ -732,6 +735,9 @@ def __skip_this(self, level): skip = False if self.exclude_paths and level.path() in self.exclude_paths: skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.match(level.path()) for exclude_regex_path in self.exclude_regex_paths]): + skip = True else: if isinstance(level.t1, self.exclude_types_tuple) or isinstance( level.t2, self.exclude_types_tuple): diff --git a/deepdiff/search.py b/deepdiff/search.py index e7e2079c..8f0a5979 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -5,6 +5,7 @@ from __future__ import absolute_import from __future__ import print_function +import re import sys from collections import Iterable from collections import MutableMapping @@ -75,6 +76,7 @@ def __init__(self, obj, item, exclude_paths=set(), + exclude_regex_paths=set(), exclude_types=set(), verbose_level=1, **kwargs): @@ -87,6 +89,7 @@ def __init__(self, self.obj = obj self.item = item self.exclude_paths = set(exclude_paths) + self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in set(exclude_regex_paths)] self.exclude_types = set(exclude_types) self.exclude_types_tuple = tuple( exclude_types) # we need tuple for checking isinstance @@ -144,6 +147,9 @@ def __skip_this(self, item, parent): skip = False if parent in self.exclude_paths: skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.match(parent) for exclude_regex_path in self.exclude_regex_paths]): + skip = True else: if isinstance(item, self.exclude_types_tuple): skip = True diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 41a51838..d9b343e4 100644 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1297,6 +1297,13 @@ def test_skip_dictionary_path_with_custom_object(self): result = {} self.assertEqual(ddiff, result) + def test_skip_regexp(self): + t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + ddiff = DeepDiff(t1, t2, exclude_regex_paths=["root\[\d+\]\['b'\]"]) + result = {} + self.assertEqual(ddiff, result) + def test_skip_str_type_in_dictionary(self): t1 = {1: {2: "a"}} t2 = {1: {}} diff --git a/tests/test_search.py b/tests/test_search.py index 6196f8a1..6564199c 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -217,6 +217,12 @@ def test_skip_dictionary_path(self): result = {} self.assertEqual(ds, result) + def test_skip_regexp(self): + obj = [{'a': 1, 'b': "somewhere"}, {'c': 4, 'b': "somewhere"}] + ds = DeepSearch(obj, item, exclude_regex_paths=["root\[\d+\]"]) + result = {} + self.assertEqual(ds, result) + def test_skip_type_str(self): obj = "long string somewhere" result = {} From 0004c3d877e36dd2a1541b19b91f741c4636d0d8 Mon Sep 17 00:00:00 2001 From: Seperman Date: Sun, 6 Aug 2017 23:15:41 -0700 Subject: [PATCH 02/76] exclude regex path --- README.md | 52 +++++++++++++++++++------------------------- deepdiff/diff.py | 5 ++++- deepdiff/search.py | 2 +- tests/test_search.py | 6 +++++ 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index b5a9df9f..817b12d2 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,10 @@ which will print you: ## Exclude types or paths -### Exclude certain types from comparison: +### Exclude types + +#### Exclude certain types from comparison: + ```python >>> l1 = logging.getLogger("test") >>> l2 = logging.getLogger("test2") @@ -109,7 +112,12 @@ which will print you: {} ``` -### Exclude part of your object tree from comparison: +### Exclude paths + +#### Exclude part of your object tree from comparison + +use `exclude_paths` and pass a set or list of paths to exclude: + ```python >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} @@ -117,7 +125,7 @@ which will print you: {} ``` -You can also exclude regular expression: +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: ```python >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] @@ -126,6 +134,17 @@ You can also exclude regular expression: {} ``` +example 2: + +```python +>>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} +>>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} +>>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) +{} +``` + +Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. + ## Significant Digits Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits @@ -468,33 +487,6 @@ And if you would like to know the values of items added or removed, please set t 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} ``` -### Exclude certain types from comparison: -```python ->>> l1 = logging.getLogger("test") ->>> l2 = logging.getLogger("test2") ->>> t1 = {"log": l1, 2: 1337} ->>> t2 = {"log": l2, 2: 1337} ->>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) -{} -``` - -### Exclude part of your object tree from comparison: -```python ->>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} ->>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} ->>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) -{} -``` - -You can also exclude regular expression : - -```python ->>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] ->>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] ->>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) -{} -``` - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 0bc005aa..aed1b932 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -81,6 +81,9 @@ class DeepDiff(ResultDict): exclude_paths: list, default = None. List of paths to exclude from the report. + exclude_regex_paths: list, default = None. + List of regex paths to exclude from the report. + exclude_types: list, default = None. List of object types to exclude from the report. @@ -751,7 +754,7 @@ def __skip_this(self, level): if self.exclude_paths and level.path() in self.exclude_paths: skip = True elif self.exclude_regex_paths and any( - [exclude_regex_path.match(level.path()) for exclude_regex_path in self.exclude_regex_paths]): + [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]): skip = True else: if isinstance(level.t1, self.exclude_types_tuple) or isinstance( diff --git a/deepdiff/search.py b/deepdiff/search.py index dc445c1e..55d49cc7 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -162,7 +162,7 @@ def __skip_this(self, item, parent): if parent in self.exclude_paths: skip = True elif self.exclude_regex_paths and any( - [exclude_regex_path.match(parent) for exclude_regex_path in self.exclude_regex_paths]): + [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): skip = True else: if isinstance(item, self.exclude_types_tuple): diff --git a/tests/test_search.py b/tests/test_search.py index d3c0797d..0f2d7e0d 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -247,6 +247,12 @@ def test_skip_regexp(self): result = {} self.assertEqual(ds, result) + def test_skip_regexp2(self): + obj = {'a': [1, 2, [3, [item]]]} + ds = DeepSearch(obj, item, exclude_regex_paths=["\[\d+\]"]) + result = {} + self.assertEqual(ds, result) + def test_unknown_parameters(self): with self.assertRaises(ValueError): DeepSearch(1, 1, wrong_param=2) From e38a1424c58841e1b8d19f9af570342c5c3839f8 Mon Sep 17 00:00:00 2001 From: Seperman Date: Sun, 6 Aug 2017 23:41:05 -0700 Subject: [PATCH 03/76] Bumping up the version to 3.5 and updating docs --- AUTHORS | 1 + README.md | 5 +++-- README.txt | 4 +++- deepdiff/diff.py | 37 ++++++++++++++++++++++++++++++++++++- docs/conf.py | 4 ++-- docs/index.rst | 3 ++- setup.py | 2 +- 7 files changed, 48 insertions(+), 8 deletions(-) diff --git a/AUTHORS b/AUTHORS index 670dc09a..23034d5d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -15,3 +15,4 @@ Also thanks to: - movermeyer for updating docs - maxrothman for search in inherited class attributes - maxrothman for search for types/objects +- MartyHub for exclude regex paths diff --git a/README.md b/README.md index 817b12d2..1676b2ea 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ -# deepdiff v 3.3.0 +# DeepDiff v 3.5.0 -[![Join the chat at https://gitter.im/deepdiff/Lobby](https://badges.gitter.im/deepdiff/Lobby.svg)](https://gitter.im/deepdiff/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) ![Doc](https://readthedocs.org/projects/deepdiff/badge/?version=latest) @@ -804,6 +803,7 @@ And here is more info: ## Change log +- v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) - v3-2-1: Fixing hash of None @@ -861,3 +861,4 @@ Also thanks to: - movermeyer for updating docs - maxrothman for search in inherited class attributes - maxrothman for search for types/objects +- MartyHub for exclude regex paths diff --git a/README.txt b/README.txt index 3ab5a63e..5ca7b687 100644 --- a/README.txt +++ b/README.txt @@ -1,4 +1,4 @@ -**DeepDiff v 3.3.0** +**DeepDiff v 3.5.0** Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. @@ -245,6 +245,7 @@ http://zepworks.com/blog/diff-it-to-digg-it/ **Changelog** +- v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) - v3-2-1: Fixing hash of None @@ -302,3 +303,4 @@ Also thanks to: - movermeyer for updating docs - maxrothman for search in inherited class attributes - maxrothman for search for types/objects +- MartyHub for exclude regex paths diff --git a/deepdiff/diff.py b/deepdiff/diff.py index aed1b932..4eb807f1 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -585,6 +585,41 @@ class DeepDiff(ResultDict): {'values_changed': {}} + **Exclude types** + + Exclude certain types from comparison: + >>> l1 = logging.getLogger("test") + >>> l2 = logging.getLogger("test2") + >>> t1 = {"log": l1, 2: 1337} + >>> t2 = {"log": l2, 2: 1337} + >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) + {} + + **Exclude paths** + + Exclude part of your object tree from comparison + use `exclude_paths` and pass a set or list of paths to exclude: + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) + {} + + You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: + >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) + {} + + example 2: + >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} + >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} + >>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) + {} + + Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. + + + .. note:: All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. @@ -628,7 +663,7 @@ def __init__(self, raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are ignore_order, report_repetition, significant_digits," - "exclude_paths, exclude_types, verbose_level and view.") % ', '.join(kwargs.keys())) + "exclude_paths, exclude_types, exclude_regex_paths, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order self.report_repetition = report_repetition diff --git a/docs/conf.py b/docs/conf.py index 2a5df8dd..6cf5431b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '3.3.0' +version = '3.5.0' # The full version, including alpha/beta/rc tags. -release = '3.3.0' +release = '3.5.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 7418cc2a..fe4af5f8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -DeepDiff 3.3.0 documentation! +DeepDiff 3.5.0 documentation! ============================= **DeepDiff: Deep Difference of dictionaries, iterables and almost any other object recursively.** @@ -346,6 +346,7 @@ Indices and tables Changelog ========= +- v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) - v3-2-1: Fixing hash of None diff --git a/setup.py b/setup.py index 61404ddd..b318dc60 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description = "Deep Difference and Search of any Python object/data." setup(name='deepdiff', - version='3.3.0', + version='3.5.0', description='Deep Difference and Search of any Python object/data.', url='https://github.com/seperman/deepdiff', download_url='https://github.com/seperman/deepdiff/tarball/master', From cb9c74799162ffaf03210fcd85d9f44356b2f1bf Mon Sep 17 00:00:00 2001 From: Sreenadh T C Date: Fri, 25 Aug 2017 11:21:54 +0530 Subject: [PATCH 04/76] Add an option to search for an exact string --- README.md | 14 ++++++++++++++ deepdiff/search.py | 14 ++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7fb5dae3..4640bfa2 100644 --- a/README.md +++ b/README.md @@ -175,7 +175,21 @@ Which will print: {'matched_paths': {"root['somewhere']": "around"}, 'matched_values': {"root['long']": "somewhere"}} ``` +Now, think of a case where you want to match a value as a word. +```py +from deepdiff import DeepSearch +obj = {"long": "somewhere around", "string": 2, 0: 0, "somewhere": "around"} +ds = DeepSearch(obj, "around", match_string=True, verbose_level=2) +print(ds) +ds = DeepSearch(obj, "around", verbose_level=2) +print(ds) +``` +Which will print: +```py +{'matched_values': {"root['somewhere']": 'around'}} +{'matched_values': {"root['long']": 'somewhere around',"root['somewhere']": 'around'}} +``` Tip: An interesting use case is to search inside `locals()` when doing pdb. ## Grep diff --git a/deepdiff/search.py b/deepdiff/search.py index 448929f1..d69f6dca 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -78,6 +78,7 @@ def __init__(self, exclude_types=set(), verbose_level=1, case_sensitive=False, + match_string=False, **kwargs): if kwargs: raise ValueError(( @@ -98,6 +99,9 @@ def __init__(self, matched_values=self.__set_or_dict(), unprocessed=[]) + # Cases where user wants to match exact string item + self.match_string = match_string + self.__search(obj, item, parents_ids=frozenset({id(obj)})) empty_keys = [k for k, v in getattr(self, items)() if not v] @@ -239,8 +243,14 @@ def __search_iterable(self, def __search_str(self, obj, item, parent): """Compare strings""" obj_text = obj if self.case_sensitive else obj.lower() - if item in obj_text: - self.__report(report_key='matched_values', key=parent, value=obj) + + if self.match_string: + if item == obj_text: + self.__report(report_key='matched_values', key=parent, value=obj) + + else: + if item in obj_text: + self.__report(report_key='matched_values', key=parent, value=obj) def __search_numbers(self, obj, item, parent): if item == obj: From e11c6839ee939400f2b8513b1bd03ac6bcfa9484 Mon Sep 17 00:00:00 2001 From: Seperman Date: Tue, 19 Sep 2017 17:04:05 -0700 Subject: [PATCH 05/76] Major change in ContentHash wip --- deepdiff/contenthash.py | 76 ++++++++++--------- tests/test_hash.py | 158 +++++++++++++++++++++------------------- 2 files changed, 122 insertions(+), 112 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 501cb073..aeec4fa7 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -51,6 +51,7 @@ def __init__(self, hasher=hash, ignore_repetition=True, significant_digits=None, + constant_size=True, **kwargs): if kwargs: raise ValueError( @@ -63,7 +64,7 @@ def __init__(self, exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.hasher = hasher + self.hasher = lambda x: str(hasher(x)) hashes = hashes if hashes else {} self.update(hashes) self['unprocessed'] = [] @@ -71,6 +72,10 @@ def __init__(self, self.skipped = Skipped() self.not_hashed = NotHashed() self.significant_digits = significant_digits + # makes the hash return constant size result if true + # the only time it should be set to False is when + # testing the individual hash functions for different types of objects. + self.constant_size = constant_size self.__hash(obj, parents_ids=frozenset({id(obj)})) @@ -102,14 +107,7 @@ def __add_to_frozen_set(parents_ids, item_id): parents_ids.add(item_id) return frozenset(parents_ids) - def __get_and_set_str_hash(self, obj): - obj_id = id(obj) - result = self.hasher(obj) - result = "str:{}".format(result) - self[obj_id] = result - return result - - def __hash_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): + def __prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): """Difference of 2 objects""" try: if is_namedtuple: @@ -123,7 +121,7 @@ def __hash_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): self['unprocessed'].append(obj) return self.unprocessed - result = self.__hash_dict(obj, parents_ids) + result = self.__prep_dict(obj, parents_ids) result = "nt{}".format(result) if is_namedtuple else "obj{}".format( result) return result @@ -135,7 +133,7 @@ def __skip_this(self, obj): return skip - def __hash_dict(self, obj, parents_ids=frozenset({})): + def __prep_dict(self, obj, parents_ids=frozenset({})): result = [] obj_keys = set(obj.keys()) @@ -157,10 +155,10 @@ def __hash_dict(self, obj, parents_ids=frozenset({})): return result - def __hash_set(self, obj): - return "set:{}".format(self.__hash_iterable(obj)) + def __prep_set(self, obj): + return "set:{}".format(self.__prep_iterable(obj)) - def __hash_iterable(self, obj, parents_ids=frozenset({})): + def __prep_iterable(self, obj, parents_ids=frozenset({})): result = defaultdict(int) @@ -174,13 +172,15 @@ def __hash_iterable(self, obj, parents_ids=frozenset({})): parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) hashed = self.__hash(x, parents_ids_added) + # counting repetitions result[hashed] += 1 if self.ignore_repetition: result = list(result.keys()) else: + # items could be iteritems based on py version so we use getattr result = [ - '{}|{}'.format(i[0], i[1]) for i in getattr(result, items)() + '{}|{}'.format(i, v) for i, v in getattr(result, items)() ] result.sort() @@ -189,10 +189,10 @@ def __hash_iterable(self, obj, parents_ids=frozenset({})): return result - def __hash_str(self, obj): - return self.__get_and_set_str_hash(obj) + def __prep_str(self, obj): + return 'str:{}'.format(obj) - def __hash_number(self, obj): + def __prep_number(self, obj): # Based on diff.DeepDiff.__diff_numbers if self.significant_digits is not None and isinstance(obj, ( float, complex, Decimal)): @@ -202,30 +202,29 @@ def __hash_number(self, obj): if set(obj_s) <= set("-0."): obj_s = "0.00" result = "number:{}".format(obj_s) - obj_id = id(obj) - self[obj_id] = result else: result = "{}:{}".format(type(obj).__name__, obj) return result - def __hash_tuple(self, obj, parents_ids): + def __prep_tuple(self, obj, parents_ids): # Checking to see if it has _fields. Which probably means it is a named # tuple. try: obj._asdict # It must be a normal tuple except AttributeError: - result = self.__hash_iterable(obj, parents_ids) + result = self.__prep_iterable(obj, parents_ids) # We assume it is a namedtuple then else: - result = self.__hash_obj(obj, parents_ids, is_namedtuple=True) + result = self.__prep_obj(obj, parents_ids, is_namedtuple=True) return result - def __hash(self, obj, parent="root", parents_ids=frozenset({})): + def __hash(self, obj, parents_ids=frozenset({})): """The main diff method""" obj_id = id(obj) if obj_id in self: + print('obj is already there') return self[obj_id] result = self.not_hashed @@ -237,34 +236,39 @@ def __hash(self, obj, parent="root", parents_ids=frozenset({})): result = 'NONE' elif isinstance(obj, strings): - result = self.__hash_str(obj) + result = self.__prep_str(obj) elif isinstance(obj, numbers): - result = self.__hash_number(obj) + result = self.__prep_number(obj) elif isinstance(obj, MutableMapping): - result = self.__hash_dict(obj, parents_ids) + result = self.__prep_dict(obj, parents_ids) elif isinstance(obj, tuple): - result = self.__hash_tuple(obj, parents_ids) + result = self.__prep_tuple(obj, parents_ids) elif isinstance(obj, (set, frozenset)): - result = self.__hash_set(obj) + result = self.__prep_set(obj) elif isinstance(obj, Iterable): - result = self.__hash_iterable(obj, parents_ids) + result = self.__prep_iterable(obj, parents_ids) else: - result = self.__hash_obj(obj, parents_ids) - - if result != self.not_hashed and obj_id not in self and not isinstance( - obj, numbers): - self[obj_id] = result + result = self.__prep_obj(obj, parents_ids) if result is self.not_hashed: # pragma: no cover - self[obj_id] = self.not_hashed self['unprocessed'].append(obj) + elif self.constant_size: + # from nose.tools import set_trace; set_trace() + temp = result + result = self.hasher(result) + print('-' * 10) + print(obj) + print("{} -> {}".format(temp, result)) + + self[obj_id] = result + return result diff --git a/tests/test_hash.py b/tests/test_hash.py index 8361faf6..26003f20 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -24,6 +24,7 @@ from deepdiff import DeepHash from deepdiff.helper import py3, pypy3 from collections import namedtuple +from functools import partial import logging logging.disable(logging.CRITICAL) @@ -41,20 +42,25 @@ def __repr__(self): return self.__str__() -def hash_and_format(obj): - return "str:{}".format(hash(obj)) +hasher = DeepHash(None).hasher +# Only the prep part of DeepHashPrep. We don't need to test the actual hash function. +DeepHashPrep = partial(DeepHash, constant_size=False) + + +def prep_str(obj): + return 'str:{}'.format(obj) class DeepHashTestCase(unittest.TestCase): - """DeepHash Tests.""" + """DeepHashPrep Tests.""" - def test_hash_str(self): + def test_prep_str(self): obj = "a" - expected_result = {id(obj): hash_and_format(obj)} - result = DeepHash(obj) + expected_result = {id(obj): prep_str(obj)} + result = DeepHashPrep(obj) self.assertEqual(result, expected_result) - def test_hash_str_fail_if_mutable(self): + def test_prep_str_fail_if_mutable(self): """ This test fails if ContentHash is getting a mutable copy of hashes which means each init of the ContentHash will have hashes from @@ -62,32 +68,29 @@ def test_hash_str_fail_if_mutable(self): """ obj1 = "a" id_obj1 = id(obj1) - expected_result = {id_obj1: hash_and_format(obj1)} - result = DeepHash(obj1) + expected_result = {id_obj1: prep_str(obj1)} + result = DeepHashPrep(obj1) self.assertEqual(result, expected_result) obj2 = "b" - result = DeepHash(obj2) + result = DeepHashPrep(obj2) self.assertTrue(id_obj1 not in result) - def test_list(self): + def do_list_or_tuple(self, func, func_str): string1 = "a" - obj = [string1, 10, 20] + obj = func([string1, 10, 20]) + string1_prepped = prep_str(string1) expected_result = { - id(string1): hash_and_format(string1), - id(obj): 'list:int:10,int:20,str:%s' % hash(string1) + id(string1): string1_prepped, + id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), + id(10): 'int:10', + id(20): 'int:20' } - result = DeepHash(obj) + result = DeepHashPrep(obj) self.assertEqual(result, expected_result) - def test_tuple(self): - string1 = "a" - obj = (string1, 10, 20) - expected_result = { - id(string1): hash_and_format(string1), - id(obj): 'tuple:int:10,int:20,str:%s' % hash(string1) - } - result = DeepHash(obj) - self.assertEqual(result, expected_result) + def test_list_and_tuple(self): + for func, func_str in ((list, 'list'), (tuple, 'tuple')): + self.do_list_or_tuple(func, func_str) def test_named_tuples(self): # checking if pypy3 is running the test @@ -95,33 +98,36 @@ def test_named_tuples(self): # the id of x inside the named tuple changes. x = "x" x_id = id(x) - x_hash = hash(x) + x_prep = prep_str(x) Point = namedtuple('Point', [x]) obj = Point(x=11) - result = DeepHash(obj) + result = DeepHashPrep(obj) if pypy3: - self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % - x_hash) + self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % x) else: expected_result = { - x_id: 'str:{}'.format(x_hash), - id(obj): 'ntdict:{str:%s:int:11}' % x_hash + x_id: x_prep, + id(obj): 'ntdict:{str:%s:int:11}' % x, + id(11): 'int:11', } self.assertEqual(result, expected_result) def test_dict(self): string1 = "a" - hash_string1 = hash(string1) + string1_prepped = prep_str(string1) key1 = "key1" - hash_key1 = hash(key1) + key1_prepped = prep_str(key1) obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(key1): "str:{}".format(hash_key1), - id(string1): "str:{}".format(hash_string1), - id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % - (hash_key1, hash_string1) + id(1): "int:1", + id(2): "int:2", + id(10): "int:10", + id(20): "int:20", + id(key1): key1_prepped, + id(string1): string1_prepped, + id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) } - result = DeepHash(obj) + result = DeepHashPrep(obj) self.assertEqual(result, expected_result) def test_dict_in_list(self): @@ -140,60 +146,60 @@ def test_dict_in_list(self): 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % (hash_key1, hash_string1) } - result = DeepHash(obj) + result = DeepHashPrep(obj) self.assertEqual(result, expected_result) def test_nested_lists_same_hash(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3], 2, 1] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_nested_lists_same_hash2(self): t1 = [1, 2, [3, [4, 5]]] t2 = [[[5, 4], 3], 2, 1] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_nested_lists_same_hash3(self): t1 = [{1: [2, 3], 4: [5, [6, 7]]}] t2 = [{4: [[7, 6], 5], 1: [3, 2]}] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_nested_lists_in_dictionary_same_hash(self): t1 = [{"c": 4}, {"c": 3}] t2 = [{"c": 3}, {"c": 4}] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_same_sets_same_hash(self): t1 = {1, 3, 2} t2 = {2, 3, 1} - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] t2 = [{2, 3, 1}, "a"] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) def test_unknown_parameters(self): with self.assertRaises(ValueError): - DeepHash(1, wrong_param=2) + DeepHashPrep(1, wrong_param=2) def test_bad_attribute(self): class Bad(object): @@ -207,7 +213,7 @@ def __str__(self): t1 = Bad() - result = DeepHash(t1) + result = DeepHashPrep(t1) expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} self.assertEqual(result, expected_result) @@ -222,8 +228,8 @@ def test_repetition_by_default_does_not_effect(self): b = [list2, 2, 1] b_id = id(b) - hash_a = DeepHash(a) - hash_b = DeepHash(b) + hash_a = DeepHashPrep(a) + hash_b = DeepHashPrep(b) self.assertEqual(hash_a[list1_id], hash_b[list2_id]) self.assertEqual(hash_a[a_id], hash_b[b_id]) @@ -239,8 +245,8 @@ def test_setting_repetition_off_unequal_hash(self): b = [list2, 2, 1] b_id = id(b) - hash_a = DeepHash(a, ignore_repetition=False) - hash_b = DeepHash(b, ignore_repetition=False) + hash_a = DeepHashPrep(a, ignore_repetition=False) + hash_b = DeepHashPrep(b, ignore_repetition=False) self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) self.assertNotEqual(hash_a[a_id], hash_b[b_id]) @@ -256,60 +262,60 @@ def hasher(obj): obj = "a" expected_result = {id(obj): "str:0"} - result = DeepHash(obj, hasher=hasher) + result = DeepHashPrep(obj, hasher=hasher) self.assertEqual(result, expected_result) - # we simply feed the last result to DeepHash + # we simply feed the last result to DeepHashPrep # So it can re-use the results. - result2 = DeepHash(obj, hasher=hasher, hashes=result) + result2 = DeepHashPrep(obj, hasher=hasher, hashes=result) # if hashes are not cached and re-used, # then the next time hasher runs, it returns # number 1 instead of 0. self.assertEqual(result2, expected_result) - result3 = DeepHash(obj, hasher=hasher) + result3 = DeepHashPrep(obj, hasher=hasher) expected_result = {id(obj): "str:{}".format(1)} self.assertEqual(result3, expected_result) def test_skip_type(self): l1 = logging.getLogger("test") obj = {"log": l1, 2: 1337} - result = DeepHash(obj, exclude_types={logging.Logger}) + result = DeepHashPrep(obj, exclude_types={logging.Logger}) self.assertEqual(result[id(l1)], result.skipped) - def test_hash_dic_with_loop(self): + def test_prep_dic_with_loop(self): obj = {2: 1337} obj[1] = obj - result = DeepHash(obj) + result = DeepHashPrep(obj) expected_result = {id(obj): 'dict:{int:2:int:1337}'} self.assertEqual(result, expected_result) - def test_hash_iterable_with_loop(self): + def test_prep_iterable_with_loop(self): obj = [1] obj.append(obj) - result = DeepHash(obj) + result = DeepHashPrep(obj) expected_result = {id(obj): 'list:int:1'} self.assertEqual(result, expected_result) - def test_hash_iterable_with_excluded_type(self): + def test_prep_iterable_with_excluded_type(self): l1 = logging.getLogger("test") obj = [1, l1] - result = DeepHash(obj, exclude_types={logging.Logger}) + result = DeepHashPrep(obj, exclude_types={logging.Logger}) self.assertTrue(id(l1) not in result) class DeepHashSHA1TestCase(unittest.TestCase): - """DeepHash with SHA1 Tests.""" + """DeepHashPrep with SHA1 Tests.""" - def test_hash_str(self): + def test_prep_str(self): obj = "a" expected_result = { id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) self.assertEqual(result, expected_result) - def test_hash_str_fail_if_mutable(self): + def test_prep_str_fail_if_mutable(self): """ This test fails if ContentHash is getting a mutable copy of hashes which means each init of the ContentHash will have hashes from @@ -320,10 +326,10 @@ def test_hash_str_fail_if_mutable(self): expected_result = { id_obj1: 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHash(obj1, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj1, hasher=DeepHashPrep.sha1hex) self.assertEqual(result, expected_result) obj2 = "b" - result = DeepHash(obj2, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj2, hasher=DeepHashPrep.sha1hex) self.assertTrue(id_obj1 not in result) def test_bytecode(self): @@ -336,7 +342,7 @@ def test_bytecode(self): expected_result = { id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) self.assertEqual(result, expected_result) def test_list1(self): @@ -347,7 +353,7 @@ def test_list1(self): id(obj): 'list:int:10,int:20,str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) self.assertEqual(result, expected_result) def test_dict1(self): @@ -360,5 +366,5 @@ def test_dict1(self): id(obj): 'dict:{int:1:int:10;int:2:int:20;str:63216212fdf88fe0c838c36ab65278b9953000d6:str:48591f1d794734cabf55f96f5a5a72c084f13ac0}' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) self.assertEqual(result, expected_result) From f863a04d6ab9fb549d6176a67670d164892ff1c1 Mon Sep 17 00:00:00 2001 From: Tom Haddon Date: Mon, 2 Oct 2017 16:48:09 +0100 Subject: [PATCH 06/76] Add support for datetime.timedelta objects per issue#81 --- deepdiff/helper.py | 4 ++-- tests/test_diff_text.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 7fea2a43..64ce0522 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -22,14 +22,14 @@ strings = (str, bytes) # which are both basestring unicode_type = str bytes_type = bytes - numbers = (int, float, complex, datetime.datetime, datetime.date, Decimal) + numbers = (int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) items = 'items' else: # pragma: no cover int = int strings = (str, unicode) unicode_type = unicode bytes_type = str - numbers = (int, float, long, complex, datetime.datetime, datetime.date, + numbers = (int, float, long, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) items = 'iteritems' diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 8b1a543b..c8cc50f1 100644 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -128,6 +128,24 @@ def test_diffs_dates(self): } self.assertEqual(ddiff, result) + def test_diffs_timedeltas(self): + t1 = datetime.timedelta(days=1, seconds=12) + t2 = datetime.timedelta(days=1, seconds=10) + t3 = datetime.timedelta(seconds=(60*60*24) + 12) + ddiff = DeepDiff(t1, t2) + result = { + 'values_changed': { + 'root': { + 'new_value': t2, + 'old_value': t1 + } + } + } + self.assertEqual(ddiff, result) + ddiff = DeepDiff(t1, t3) + result = {} + self.assertEqual(ddiff, result) + def test_string_difference(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world"}} t2 = {1: 1, 2: 4, 3: 3, 4: {"a": "hello", "b": "world!"}} From 7a4d0cbe8bb1a07143daa5df9ceec86adb9d6abd Mon Sep 17 00:00:00 2001 From: Seperman Date: Mon, 2 Oct 2017 17:19:56 -0700 Subject: [PATCH 07/76] fixing one test at a time --- deepdiff/contenthash.py | 8 ++++++-- tests/test_hash.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index aeec4fa7..0c93eb8c 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -48,7 +48,7 @@ def __init__(self, obj, hashes=None, exclude_types=set(), - hasher=hash, + hasher=None, ignore_repetition=True, significant_digits=None, constant_size=True, @@ -64,7 +64,7 @@ def __init__(self, exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.hasher = lambda x: str(hasher(x)) + self.hasher = self.basic_hash if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) self['unprocessed'] = [] @@ -84,6 +84,10 @@ def __init__(self, else: del self['unprocessed'] + @staticmethod + def basic_hash(obj): + return str(hash(obj)) + @staticmethod def sha1hex(obj): """Use Sha1 for more accuracy.""" diff --git a/tests/test_hash.py b/tests/test_hash.py index 26003f20..63da008c 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -42,7 +42,6 @@ def __repr__(self): return self.__str__() -hasher = DeepHash(None).hasher # Only the prep part of DeepHashPrep. We don't need to test the actual hash function. DeepHashPrep = partial(DeepHash, constant_size=False) @@ -258,23 +257,24 @@ def test_already_calculated_hash_wont_be_recalculated(self): hashes = (i for i in range(10)) def hasher(obj): - return next(hashes) + return str(next(hashes)) obj = "a" - expected_result = {id(obj): "str:0"} - result = DeepHashPrep(obj, hasher=hasher) + expected_result = {id(obj): '0'} + from nose.tools import set_trace; set_trace() + result = DeepHash(obj, hasher=hasher) self.assertEqual(result, expected_result) - # we simply feed the last result to DeepHashPrep + # we simply feed the last result to DeepHash # So it can re-use the results. - result2 = DeepHashPrep(obj, hasher=hasher, hashes=result) + result2 = DeepHash(obj, hasher=hasher, hashes=result) # if hashes are not cached and re-used, # then the next time hasher runs, it returns # number 1 instead of 0. self.assertEqual(result2, expected_result) - result3 = DeepHashPrep(obj, hasher=hasher) - expected_result = {id(obj): "str:{}".format(1)} + result3 = DeepHash(obj, hasher=hasher) + expected_result = {id(obj): '1'} self.assertEqual(result3, expected_result) def test_skip_type(self): @@ -312,7 +312,7 @@ def test_prep_str(self): expected_result = { id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_prep_str_fail_if_mutable(self): @@ -326,10 +326,10 @@ def test_prep_str_fail_if_mutable(self): expected_result = { id_obj1: 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHashPrep(obj1, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj1, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) obj2 = "b" - result = DeepHashPrep(obj2, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj2, hasher=DeepHash.sha1hex) self.assertTrue(id_obj1 not in result) def test_bytecode(self): @@ -342,7 +342,7 @@ def test_bytecode(self): expected_result = { id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_list1(self): @@ -353,7 +353,7 @@ def test_list1(self): id(obj): 'list:int:10,int:20,str:48591f1d794734cabf55f96f5a5a72c084f13ac0' } - result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_dict1(self): @@ -366,5 +366,5 @@ def test_dict1(self): id(obj): 'dict:{int:1:int:10;int:2:int:20;str:63216212fdf88fe0c838c36ab65278b9953000d6:str:48591f1d794734cabf55f96f5a5a72c084f13ac0}' } - result = DeepHashPrep(obj, hasher=DeepHashPrep.sha1hex) + result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) From 4ee3f6647fb165b4899f2fd9cc4d885d37c65cb4 Mon Sep 17 00:00:00 2001 From: Seperman Date: Mon, 2 Oct 2017 17:52:52 -0700 Subject: [PATCH 08/76] fixing tests one at a time --- deepdiff/contenthash.py | 5 +++-- tests/test_hash.py | 20 +++++--------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 0c93eb8c..0bd555bb 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -263,7 +263,7 @@ def __hash(self, obj, parents_ids=frozenset({})): if result is self.not_hashed: # pragma: no cover self['unprocessed'].append(obj) - elif self.constant_size: + elif self.constant_size and not isinstance(obj, numbers): # from nose.tools import set_trace; set_trace() temp = result result = self.hasher(result) @@ -271,7 +271,8 @@ def __hash(self, obj, parents_ids=frozenset({})): print(obj) print("{} -> {}".format(temp, result)) - self[obj_id] = result + if not isinstance(obj, numbers): + self[obj_id] = result return result diff --git a/tests/test_hash.py b/tests/test_hash.py index 63da008c..a4e17e80 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -81,8 +81,6 @@ def do_list_or_tuple(self, func, func_str): expected_result = { id(string1): string1_prepped, id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), - id(10): 'int:10', - id(20): 'int:20' } result = DeepHashPrep(obj) self.assertEqual(result, expected_result) @@ -107,7 +105,6 @@ def test_named_tuples(self): expected_result = { x_id: x_prep, id(obj): 'ntdict:{str:%s:int:11}' % x, - id(11): 'int:11', } self.assertEqual(result, expected_result) @@ -118,10 +115,6 @@ def test_dict(self): key1_prepped = prep_str(key1) obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(1): "int:1", - id(2): "int:2", - id(10): "int:10", - id(20): "int:20", id(key1): key1_prepped, id(string1): string1_prepped, id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) @@ -131,19 +124,17 @@ def test_dict(self): def test_dict_in_list(self): string1 = "a" - hash_string1 = hash(string1) key1 = "key1" - hash_key1 = hash(key1) dict1 = {key1: string1, 1: 10, 2: 20} obj = [0, dict1] expected_result = { - id(key1): "str:{}".format(hash_key1), - id(string1): "str:{}".format(hash_string1), + id(key1): "str:{}".format(key1), + id(string1): "str:{}".format(string1), id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % - (hash_key1, hash_string1), + (key1, string1), id(obj): 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % - (hash_key1, hash_string1) + (key1, string1) } result = DeepHashPrep(obj) self.assertEqual(result, expected_result) @@ -261,7 +252,6 @@ def hasher(obj): obj = "a" expected_result = {id(obj): '0'} - from nose.tools import set_trace; set_trace() result = DeepHash(obj, hasher=hasher) self.assertEqual(result, expected_result) @@ -366,5 +356,5 @@ def test_dict1(self): id(obj): 'dict:{int:1:int:10;int:2:int:20;str:63216212fdf88fe0c838c36ab65278b9953000d6:str:48591f1d794734cabf55f96f5a5a72c084f13ac0}' } - result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) From 26e25d715dcc9b6b8112db1e85d523e038ff3aa6 Mon Sep 17 00:00:00 2001 From: Seperman Date: Mon, 2 Oct 2017 17:59:15 -0700 Subject: [PATCH 09/76] fixing contenthash to produce constant size --- deepdiff/contenthash.py | 8 ++++---- tests/test_hash.py | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 0bd555bb..a65c91fc 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -265,11 +265,11 @@ def __hash(self, obj, parents_ids=frozenset({})): elif self.constant_size and not isinstance(obj, numbers): # from nose.tools import set_trace; set_trace() - temp = result + # temp = result result = self.hasher(result) - print('-' * 10) - print(obj) - print("{} -> {}".format(temp, result)) + # print('-' * 10) + # print(obj) + # print("{} -> {}".format(temp, result)) if not isinstance(obj, numbers): self[obj_id] = result diff --git a/tests/test_hash.py b/tests/test_hash.py index a4e17e80..7e67004e 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -295,14 +295,14 @@ def test_prep_iterable_with_excluded_type(self): class DeepHashSHA1TestCase(unittest.TestCase): - """DeepHashPrep with SHA1 Tests.""" + """DeepHash with SHA1 Tests.""" def test_prep_str(self): obj = "a" expected_result = { - id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' + id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' } - result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_prep_str_fail_if_mutable(self): @@ -314,36 +314,36 @@ def test_prep_str_fail_if_mutable(self): obj1 = "a" id_obj1 = id(obj1) expected_result = { - id_obj1: 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' + id_obj1: 'c2a00c48d4713267a2ab9ca9739214127830e9be' } - result = DeepHashPrep(obj1, hasher=DeepHash.sha1hex) + result = DeepHash(obj1, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) obj2 = "b" - result = DeepHashPrep(obj2, hasher=DeepHash.sha1hex) + result = DeepHash(obj2, hasher=DeepHash.sha1hex) self.assertTrue(id_obj1 not in result) def test_bytecode(self): obj = b"a" if py3: expected_result = { - id(obj): 'str:066c7cf4158717c47244fa6cf1caafca605d550b' + id(obj): '64a91ccb03c69f78d076d884de9bc5355849cc12' } else: expected_result = { - id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' + id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' } - result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_list1(self): string1 = "a" obj = [string1, 10, 20] expected_result = { - id(string1): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0', + id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', id(obj): - 'list:int:10,int:20,str:48591f1d794734cabf55f96f5a5a72c084f13ac0' + '5af30c367e2e176f7c362356559f3e8cc73302e5' } - result = DeepHashPrep(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) def test_dict1(self): @@ -351,10 +351,10 @@ def test_dict1(self): key1 = "key1" obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(key1): 'str:63216212fdf88fe0c838c36ab65278b9953000d6', - id(string1): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0', + id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', + id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', id(obj): - 'dict:{int:1:int:10;int:2:int:20;str:63216212fdf88fe0c838c36ab65278b9953000d6:str:48591f1d794734cabf55f96f5a5a72c084f13ac0}' + 'b13e2e23ed7e46208157e45bfbe0113782804e17' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) From 89131ea9ce434053038a8c6a35d3faffada62b80 Mon Sep 17 00:00:00 2001 From: Seperman Date: Wed, 1 Nov 2017 15:55:46 -0700 Subject: [PATCH 10/76] changing line ending to linux --- deepdiff/diff.py | 2 +- tests/test_hash.py | 728 +++++++++++++++++++++++---------------------- 2 files changed, 369 insertions(+), 361 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 4eb807f1..ecd1d5ec 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -984,7 +984,7 @@ def add_hash(hashes, item_hash, item, i): if item_hash in hashes: hashes[item_hash].indexes.append(i) else: - hashes[item_hash] = IndexedHash([i], item) + hashes[item_hash] = IndexedHash(indexes=[i], item=item) hashes = {} for (i, item) in enumerate(t): diff --git a/tests/test_hash.py b/tests/test_hash.py index 7e67004e..a5d2279b 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,360 +1,368 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_hash - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - On linux: - nosetests ./tests/test_hash.py:DeepHashTestCase.test_bytecode - - On windows: - nosetests .\tests\test_hash.py:DeepHashTestCase.test_string_in_root -""" -import unittest -from deepdiff import DeepHash -from deepdiff.helper import py3, pypy3 -from collections import namedtuple -from functools import partial -import logging - -logging.disable(logging.CRITICAL) - - -class CustomClass: - def __init__(self, a, b=None): - self.a = a - self.b = b - - def __str__(self): - return "({}, {})".format(self.a, self.b) - - def __repr__(self): - return self.__str__() - - -# Only the prep part of DeepHashPrep. We don't need to test the actual hash function. -DeepHashPrep = partial(DeepHash, constant_size=False) - - -def prep_str(obj): - return 'str:{}'.format(obj) - - -class DeepHashTestCase(unittest.TestCase): - """DeepHashPrep Tests.""" - - def test_prep_str(self): - obj = "a" - expected_result = {id(obj): prep_str(obj)} - result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) - - def test_prep_str_fail_if_mutable(self): - """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from - the previous init. - """ - obj1 = "a" - id_obj1 = id(obj1) - expected_result = {id_obj1: prep_str(obj1)} - result = DeepHashPrep(obj1) - self.assertEqual(result, expected_result) - obj2 = "b" - result = DeepHashPrep(obj2) - self.assertTrue(id_obj1 not in result) - - def do_list_or_tuple(self, func, func_str): - string1 = "a" - obj = func([string1, 10, 20]) - string1_prepped = prep_str(string1) - expected_result = { - id(string1): string1_prepped, - id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), - } - result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) - - def test_list_and_tuple(self): - for func, func_str in ((list, 'list'), (tuple, 'tuple')): - self.do_list_or_tuple(func, func_str) - - def test_named_tuples(self): - # checking if pypy3 is running the test - # in that case due to a pypy3 bug or something - # the id of x inside the named tuple changes. - x = "x" - x_id = id(x) - x_prep = prep_str(x) - Point = namedtuple('Point', [x]) - obj = Point(x=11) - result = DeepHashPrep(obj) - if pypy3: - self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % x) - else: - expected_result = { - x_id: x_prep, - id(obj): 'ntdict:{str:%s:int:11}' % x, - } - self.assertEqual(result, expected_result) - - def test_dict(self): - string1 = "a" - string1_prepped = prep_str(string1) - key1 = "key1" - key1_prepped = prep_str(key1) - obj = {key1: string1, 1: 10, 2: 20} - expected_result = { - id(key1): key1_prepped, - id(string1): string1_prepped, - id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) - } - result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) - - def test_dict_in_list(self): - string1 = "a" - key1 = "key1" - dict1 = {key1: string1, 1: 10, 2: 20} - obj = [0, dict1] - expected_result = { - id(key1): "str:{}".format(key1), - id(string1): "str:{}".format(string1), - id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % - (key1, string1), - id(obj): - 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % - (key1, string1) - } - result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) - - def test_nested_lists_same_hash(self): - t1 = [1, 2, [3, 4]] - t2 = [[4, 3], 2, 1] - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_same_hash2(self): - t1 = [1, 2, [3, [4, 5]]] - t2 = [[[5, 4], 3], 2, 1] - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_same_hash3(self): - t1 = [{1: [2, 3], 4: [5, [6, 7]]}] - t2 = [{4: [[7, 6], 5], 1: [3, 2]}] - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_in_dictionary_same_hash(self): - t1 = [{"c": 4}, {"c": 3}] - t2 = [{"c": 3}, {"c": 4}] - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_same_sets_same_hash(self): - t1 = {1, 3, 2} - t2 = {2, 3, 1} - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_same_sets_in_lists_same_hash(self): - t1 = ["a", {1, 3, 2}] - t2 = [{2, 3, 1}, "a"] - t1_hash = DeepHashPrep(t1) - t2_hash = DeepHashPrep(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_unknown_parameters(self): - with self.assertRaises(ValueError): - DeepHashPrep(1, wrong_param=2) - - def test_bad_attribute(self): - class Bad(object): - __slots__ = ['x', 'y'] - - def __getattr__(self, key): - raise AttributeError("Bad item") - - def __str__(self): - return "Bad Object" - - t1 = Bad() - - result = DeepHashPrep(t1) - expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} - self.assertEqual(result, expected_result) - - def test_repetition_by_default_does_not_effect(self): - list1 = [3, 4] - list1_id = id(list1) - a = [1, 2, list1] - a_id = id(a) - - list2 = [4, 3, 3] - list2_id = id(list2) - b = [list2, 2, 1] - b_id = id(b) - - hash_a = DeepHashPrep(a) - hash_b = DeepHashPrep(b) - - self.assertEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertEqual(hash_a[a_id], hash_b[b_id]) - - def test_setting_repetition_off_unequal_hash(self): - list1 = [3, 4] - list1_id = id(list1) - a = [1, 2, list1] - a_id = id(a) - - list2 = [4, 3, 3] - list2_id = id(list2) - b = [list2, 2, 1] - b_id = id(b) - - hash_a = DeepHashPrep(a, ignore_repetition=False) - hash_b = DeepHashPrep(b, ignore_repetition=False) - - self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertNotEqual(hash_a[a_id], hash_b[b_id]) - - self.assertEqual(hash_a[list1_id].replace('3|1', '3|2'), - hash_b[list2_id]) - - def test_already_calculated_hash_wont_be_recalculated(self): - hashes = (i for i in range(10)) - - def hasher(obj): - return str(next(hashes)) - - obj = "a" - expected_result = {id(obj): '0'} - result = DeepHash(obj, hasher=hasher) - self.assertEqual(result, expected_result) - - # we simply feed the last result to DeepHash - # So it can re-use the results. - result2 = DeepHash(obj, hasher=hasher, hashes=result) - # if hashes are not cached and re-used, - # then the next time hasher runs, it returns - # number 1 instead of 0. - self.assertEqual(result2, expected_result) - - result3 = DeepHash(obj, hasher=hasher) - expected_result = {id(obj): '1'} - self.assertEqual(result3, expected_result) - - def test_skip_type(self): - l1 = logging.getLogger("test") - obj = {"log": l1, 2: 1337} - result = DeepHashPrep(obj, exclude_types={logging.Logger}) - self.assertEqual(result[id(l1)], result.skipped) - - def test_prep_dic_with_loop(self): - obj = {2: 1337} - obj[1] = obj - result = DeepHashPrep(obj) - expected_result = {id(obj): 'dict:{int:2:int:1337}'} - self.assertEqual(result, expected_result) - - def test_prep_iterable_with_loop(self): - obj = [1] - obj.append(obj) - result = DeepHashPrep(obj) - expected_result = {id(obj): 'list:int:1'} - self.assertEqual(result, expected_result) - - def test_prep_iterable_with_excluded_type(self): - l1 = logging.getLogger("test") - obj = [1, l1] - result = DeepHashPrep(obj, exclude_types={logging.Logger}) - self.assertTrue(id(l1) not in result) - - -class DeepHashSHA1TestCase(unittest.TestCase): - """DeepHash with SHA1 Tests.""" - - def test_prep_str(self): - obj = "a" - expected_result = { - id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_prep_str_fail_if_mutable(self): - """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from - the previous init. - """ - obj1 = "a" - id_obj1 = id(obj1) - expected_result = { - id_obj1: 'c2a00c48d4713267a2ab9ca9739214127830e9be' - } - result = DeepHash(obj1, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - obj2 = "b" - result = DeepHash(obj2, hasher=DeepHash.sha1hex) - self.assertTrue(id_obj1 not in result) - - def test_bytecode(self): - obj = b"a" - if py3: - expected_result = { - id(obj): '64a91ccb03c69f78d076d884de9bc5355849cc12' - } - else: - expected_result = { - id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_list1(self): - string1 = "a" - obj = [string1, 10, 20] - expected_result = { - id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', - id(obj): - '5af30c367e2e176f7c362356559f3e8cc73302e5' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_dict1(self): - string1 = "a" - key1 = "key1" - obj = {key1: string1, 1: 10, 2: 20} - expected_result = { - id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', - id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', - id(obj): - 'b13e2e23ed7e46208157e45bfbe0113782804e17' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +To run only the search tests: + python -m unittest tests.test_hash + +Or to run all the tests: + python -m unittest discover + +Or to run all the tests with coverage: + coverage run --source deepdiff setup.py test + +Or using Nose: + nosetests --with-coverage --cover-package=deepdiff + +To run a specific test, run this from the root of repo: + On linux: + nosetests ./tests/test_hash.py:DeepHashTestCase.test_bytecode + + On windows: + nosetests .\tests\test_hash.py:DeepHashTestCase.test_string_in_root +""" +import unittest +from deepdiff import DeepHash +from deepdiff.helper import py3, pypy3 +from collections import namedtuple +from functools import partial +import logging + +logging.disable(logging.CRITICAL) + + +class CustomClass: + def __init__(self, a, b=None): + self.a = a + self.b = b + + def __str__(self): + return "({}, {})".format(self.a, self.b) + + def __repr__(self): + return self.__str__() + + +# Only the prep part of DeepHash. We don't need to test the actual hash function. +DeepHashPrep = partial(DeepHash, constant_size=False) + + +def prep_str(obj): + return 'str:{}'.format(obj) + + +class DeepHashTestCase(unittest.TestCase): + """DeepHashPrep Tests.""" + + def test_prep_str(self): + obj = "a" + expected_result = {id(obj): prep_str(obj)} + result = DeepHashPrep(obj) + self.assertEqual(result, expected_result) + + def test_prep_str_fail_if_mutable(self): + """ + This test fails if ContentHash is getting a mutable copy of hashes + which means each init of the ContentHash will have hashes from + the previous init. + """ + obj1 = "a" + id_obj1 = id(obj1) + expected_result = {id_obj1: prep_str(obj1)} + result = DeepHashPrep(obj1) + self.assertEqual(result, expected_result) + obj2 = "b" + result = DeepHashPrep(obj2) + self.assertTrue(id_obj1 not in result) + + def do_list_or_tuple(self, func, func_str): + string1 = "a" + obj = func([string1, 10, 20]) + string1_prepped = prep_str(string1) + expected_result = { + id(string1): string1_prepped, + id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), + } + result = DeepHashPrep(obj) + self.assertEqual(result, expected_result) + + def test_list_and_tuple(self): + for func, func_str in ((list, 'list'), (tuple, 'tuple')): + self.do_list_or_tuple(func, func_str) + + def test_named_tuples(self): + # checking if pypy3 is running the test + # in that case due to a pypy3 bug or something + # the id of x inside the named tuple changes. + x = "x" + x_id = id(x) + x_prep = prep_str(x) + Point = namedtuple('Point', [x]) + obj = Point(x=11) + result = DeepHashPrep(obj) + if pypy3: + self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % x) + else: + expected_result = { + x_id: x_prep, + id(obj): 'ntdict:{str:%s:int:11}' % x, + } + self.assertEqual(result, expected_result) + + def test_dict(self): + string1 = "a" + string1_prepped = prep_str(string1) + key1 = "key1" + key1_prepped = prep_str(key1) + obj = {key1: string1, 1: 10, 2: 20} + expected_result = { + id(key1): key1_prepped, + id(string1): string1_prepped, + id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) + } + result = DeepHashPrep(obj) + self.assertEqual(result, expected_result) + + def test_dict_in_list(self): + string1 = "a" + key1 = "key1" + dict1 = {key1: string1, 1: 10, 2: 20} + obj = [0, dict1] + expected_result = { + id(key1): "str:{}".format(key1), + id(string1): "str:{}".format(string1), + id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % + (key1, string1), + id(obj): + 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % + (key1, string1) + } + result = DeepHashPrep(obj) + self.assertEqual(result, expected_result) + + def test_nested_lists_same_hash(self): + t1 = [1, 2, [3, 4]] + t2 = [[4, 3], 2, 1] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_nested_lists_same_hash2(self): + t1 = [1, 2, [3, [4, 5]]] + t2 = [[[5, 4], 3], 2, 1] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_nested_lists_same_hash3(self): + t1 = [{1: [2, 3], 4: [5, [6, 7]]}] + t2 = [{4: [[7, 6], 5], 1: [3, 2]}] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_nested_lists_in_dictionary_same_hash(self): + t1 = [{"c": 4}, {"c": 3}] + t2 = [{"c": 3}, {"c": 4}] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_same_sets_same_hash(self): + t1 = {1, 3, 2} + t2 = {2, 3, 1} + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_similar_sets_with_significant_digits_same_hash(self): + t1 = {0.012, 0.98} + t2 = {0.013, 0.99} + t1_hash = DeepHashPrep(t1, significant_digits=1) + t2_hash = DeepHashPrep(t2, significant_digits=1) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_same_sets_in_lists_same_hash(self): + t1 = ["a", {1, 3, 2}] + t2 = [{2, 3, 1}, "a"] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + + def test_unknown_parameters(self): + with self.assertRaises(ValueError): + DeepHashPrep(1, wrong_param=2) + + def test_bad_attribute(self): + class Bad(object): + __slots__ = ['x', 'y'] + + def __getattr__(self, key): + raise AttributeError("Bad item") + + def __str__(self): + return "Bad Object" + + t1 = Bad() + + result = DeepHashPrep(t1) + expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} + self.assertEqual(result, expected_result) + + def test_repetition_by_default_does_not_effect(self): + list1 = [3, 4] + list1_id = id(list1) + a = [1, 2, list1] + a_id = id(a) + + list2 = [4, 3, 3] + list2_id = id(list2) + b = [list2, 2, 1] + b_id = id(b) + + hash_a = DeepHashPrep(a) + hash_b = DeepHashPrep(b) + + self.assertEqual(hash_a[list1_id], hash_b[list2_id]) + self.assertEqual(hash_a[a_id], hash_b[b_id]) + + def test_setting_repetition_off_unequal_hash(self): + list1 = [3, 4] + list1_id = id(list1) + a = [1, 2, list1] + a_id = id(a) + + list2 = [4, 3, 3] + list2_id = id(list2) + b = [list2, 2, 1] + b_id = id(b) + + hash_a = DeepHashPrep(a, ignore_repetition=False) + hash_b = DeepHashPrep(b, ignore_repetition=False) + + self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) + self.assertNotEqual(hash_a[a_id], hash_b[b_id]) + + self.assertEqual(hash_a[list1_id].replace('3|1', '3|2'), + hash_b[list2_id]) + + def test_already_calculated_hash_wont_be_recalculated(self): + hashes = (i for i in range(10)) + + def hasher(obj): + return str(next(hashes)) + + obj = "a" + expected_result = {id(obj): '0'} + result = DeepHash(obj, hasher=hasher) + self.assertEqual(result, expected_result) + + # we simply feed the last result to DeepHash + # So it can re-use the results. + result2 = DeepHash(obj, hasher=hasher, hashes=result) + # if hashes are not cached and re-used, + # then the next time hasher runs, it returns + # number 1 instead of 0. + self.assertEqual(result2, expected_result) + + result3 = DeepHash(obj, hasher=hasher) + expected_result = {id(obj): '1'} + self.assertEqual(result3, expected_result) + + def test_skip_type(self): + l1 = logging.getLogger("test") + obj = {"log": l1, 2: 1337} + result = DeepHashPrep(obj, exclude_types={logging.Logger}) + self.assertEqual(result[id(l1)], result.skipped) + + def test_prep_dic_with_loop(self): + obj = {2: 1337} + obj[1] = obj + result = DeepHashPrep(obj) + expected_result = {id(obj): 'dict:{int:2:int:1337}'} + self.assertEqual(result, expected_result) + + def test_prep_iterable_with_loop(self): + obj = [1] + obj.append(obj) + result = DeepHashPrep(obj) + expected_result = {id(obj): 'list:int:1'} + self.assertEqual(result, expected_result) + + def test_prep_iterable_with_excluded_type(self): + l1 = logging.getLogger("test") + obj = [1, l1] + result = DeepHashPrep(obj, exclude_types={logging.Logger}) + self.assertTrue(id(l1) not in result) + + +class DeepHashSHA1TestCase(unittest.TestCase): + """DeepHash with SHA1 Tests.""" + + def test_prep_str(self): + obj = "a" + expected_result = { + id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + self.assertEqual(result, expected_result) + + def test_prep_str_fail_if_mutable(self): + """ + This test fails if ContentHash is getting a mutable copy of hashes + which means each init of the ContentHash will have hashes from + the previous init. + """ + obj1 = "a" + id_obj1 = id(obj1) + expected_result = { + id_obj1: 'c2a00c48d4713267a2ab9ca9739214127830e9be' + } + result = DeepHash(obj1, hasher=DeepHash.sha1hex) + self.assertEqual(result, expected_result) + obj2 = "b" + result = DeepHash(obj2, hasher=DeepHash.sha1hex) + self.assertTrue(id_obj1 not in result) + + def test_bytecode(self): + obj = b"a" + if py3: + expected_result = { + id(obj): '64a91ccb03c69f78d076d884de9bc5355849cc12' + } + else: + expected_result = { + id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + self.assertEqual(result, expected_result) + + def test_list1(self): + string1 = "a" + obj = [string1, 10, 20] + expected_result = { + id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', + id(obj): + '5af30c367e2e176f7c362356559f3e8cc73302e5' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + self.assertEqual(result, expected_result) + + def test_dict1(self): + string1 = "a" + key1 = "key1" + obj = {key1: string1, 1: 10, 2: 20} + expected_result = { + id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', + id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', + id(obj): + 'b13e2e23ed7e46208157e45bfbe0113782804e17' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + self.assertEqual(result, expected_result) From aacbe753bb83e0040e6a10e78103669831e5586a Mon Sep 17 00:00:00 2001 From: Seperman Date: Wed, 1 Nov 2017 21:58:09 -0700 Subject: [PATCH 11/76] Fixing unittests --- deepdiff/contenthash.py | 5 ++--- deepdiff/diff.py | 16 ++++++++-------- tests/test_hash.py | 24 +++++++++++++++++++++--- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index a65c91fc..58c194ac 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -264,15 +264,14 @@ def __hash(self, obj, parents_ids=frozenset({})): self['unprocessed'].append(obj) elif self.constant_size and not isinstance(obj, numbers): - # from nose.tools import set_trace; set_trace() # temp = result result = self.hasher(result) # print('-' * 10) # print(obj) # print("{} -> {}".format(temp, result)) - if not isinstance(obj, numbers): - self[obj_id] = result + # if not isinstance(obj, numbers): + self[obj_id] = result return result diff --git a/deepdiff/diff.py b/deepdiff/diff.py index ecd1d5ec..b8fe5114 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -977,14 +977,14 @@ def __diff_tuple(self, level, parents_ids): else: self.__diff_obj(level, parents_ids, is_namedtuple=True) - def __create_hashtable(self, t, level): - """Create hashtable of {item_hash: item}""" + def _add_hash(self, hashes, item_hash, item, i): + if item_hash in hashes: + hashes[item_hash].indexes.append(i) + else: + hashes[item_hash] = IndexedHash(indexes=[i], item=item) - def add_hash(hashes, item_hash, item, i): - if item_hash in hashes: - hashes[item_hash].indexes.append(i) - else: - hashes[item_hash] = IndexedHash(indexes=[i], item=item) + def __create_hashtable(self, t, level): + """Create hashtable of {item_hash: (indexes, item)}""" hashes = {} for (i, item) in enumerate(t): @@ -1003,7 +1003,7 @@ def add_hash(hashes, item_hash, item, i): "thus not counting this object." % level.path()) else: - add_hash(hashes, item_hash, item, i) + self._add_hash(hashes=hashes, item_hash=item_hash, item=item, i=i) return hashes def __diff_iterable_with_contenthash(self, level): diff --git a/tests/test_hash.py b/tests/test_hash.py index a5d2279b..bba31fe2 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -79,6 +79,8 @@ def do_list_or_tuple(self, func, func_str): obj = func([string1, 10, 20]) string1_prepped = prep_str(string1) expected_result = { + id(10): 'int:10', + id(20): 'int:20', id(string1): string1_prepped, id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), } @@ -105,6 +107,7 @@ def test_named_tuples(self): expected_result = { x_id: x_prep, id(obj): 'ntdict:{str:%s:int:11}' % x, + id(11): 'int:11', } self.assertEqual(result, expected_result) @@ -115,6 +118,10 @@ def test_dict(self): key1_prepped = prep_str(key1) obj = {key1: string1, 1: 10, 2: 20} expected_result = { + id(1): 'int:1', + id(10): 'int:10', + id(2): 'int:2', + id(20): 'int:20', id(key1): key1_prepped, id(string1): string1_prepped, id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) @@ -128,6 +135,11 @@ def test_dict_in_list(self): dict1 = {key1: string1, 1: 10, 2: 20} obj = [0, dict1] expected_result = { + id(0): 'int:0', + id(1): 'int:1', + id(10): 'int:10', + id(2): 'int:2', + id(20): 'int:20', id(key1): "str:{}".format(key1), id(string1): "str:{}".format(string1), id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % @@ -285,14 +297,14 @@ def test_prep_dic_with_loop(self): obj = {2: 1337} obj[1] = obj result = DeepHashPrep(obj) - expected_result = {id(obj): 'dict:{int:2:int:1337}'} + expected_result = {id(obj): 'dict:{int:2:int:1337}', id(1): 'int:1', id(2): 'int:2', id(1337): 'int:1337'} self.assertEqual(result, expected_result) def test_prep_iterable_with_loop(self): obj = [1] obj.append(obj) result = DeepHashPrep(obj) - expected_result = {id(obj): 'list:int:1'} + expected_result = {id(obj): 'list:int:1', id(1): 'int:1'} self.assertEqual(result, expected_result) def test_prep_iterable_with_excluded_type(self): @@ -349,7 +361,9 @@ def test_list1(self): expected_result = { id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', id(obj): - '5af30c367e2e176f7c362356559f3e8cc73302e5' + '5af30c367e2e176f7c362356559f3e8cc73302e5', + id(10): 'int:10', + id(20): 'int:20', } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) @@ -359,6 +373,10 @@ def test_dict1(self): key1 = "key1" obj = {key1: string1, 1: 10, 2: 20} expected_result = { + id(1): 'int:1', + id(10): 'int:10', + id(2): 'int:2', + id(20): 'int:20', id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', id(obj): From 9daa82d57d3c19e00141629738ac69e92cc8d15d Mon Sep 17 00:00:00 2001 From: Seperman Date: Wed, 1 Nov 2017 21:59:47 -0700 Subject: [PATCH 12/76] Adding comments --- deepdiff/contenthash.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 58c194ac..b660773e 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -264,13 +264,10 @@ def __hash(self, obj, parents_ids=frozenset({})): self['unprocessed'].append(obj) elif self.constant_size and not isinstance(obj, numbers): - # temp = result result = self.hasher(result) - # print('-' * 10) - # print(obj) - # print("{} -> {}".format(temp, result)) - # if not isinstance(obj, numbers): + # It is important to keep the hash of all objects. + # The hashes will be later used for comparing the objects. self[obj_id] = result return result From 3faca52d13482afe28d558ee8ad04bd413622ca8 Mon Sep 17 00:00:00 2001 From: Seperman Date: Thu, 2 Nov 2017 15:05:28 -0700 Subject: [PATCH 13/76] Adding DeepHash to documents. Fixing issue when hashes are integers but need to be joined --- README.md | 9 ++++- deepdiff/contenthash.py | 89 +++++++++++++++++++++++++++++++++++++---- deepdiff/search.py | 22 +++++----- docs/contenthash.rst | 14 +++++++ docs/index.rst | 6 +++ tests/test_hash.py | 37 ++++++++++++----- 6 files changed, 148 insertions(+), 29 deletions(-) create mode 100644 docs/contenthash.rst diff --git a/README.md b/README.md index fd52de8d..48730adb 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ Verbose level by default is 1. The possible values are 0, 1 and 2. - Verbose level 1: default - Verbose level 2: will report values when custom objects or dictionaries have items added or removed. [Example](#items-added-or-removed-verbose) -## Deep Search +# Deep Search (New in v2-1-0) Tip: Take a look at [grep](#grep) which gives you a new interface for DeepSearch! @@ -202,6 +202,7 @@ Which will print: {'matched_paths': {"root['somewhere']": "around"}, 'matched_values': {"root['long']": "somewhere"}} ``` + Now, think of a case where you want to match a value as a word. ```py @@ -211,12 +212,15 @@ ds = DeepSearch(obj, "around", match_string=True, verbose_level=2) print(ds) ds = DeepSearch(obj, "around", verbose_level=2) print(ds) -``` +``` + Which will print: + ```py {'matched_values': {"root['somewhere']": 'around'}} {'matched_values': {"root['long']": 'somewhere around',"root['somewhere']": 'around'}} ``` + Tip: An interesting use case is to search inside `locals()` when doing pdb. ## Grep @@ -876,3 +880,4 @@ Also thanks to: - maxrothman for search in inherited class attributes - maxrothman for search for types/objects - MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index b660773e..46351844 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -42,6 +42,85 @@ def __str__(self): class DeepHash(dict): r""" **DeepHash** + + DeepHash calculates the hash of objects based on their contents in a deterministic way. + This way 2 objects with the same content should have the same hash. + + The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. + For example you can use DeepHash to calculate the hash of a set or a dictionary! + + The core of DeepHash is a deterministic serialization of your object into a string so it + can be passed to a hash function. By default it uses Python's built-in hash function + but you can pass another hash function to it if you want. + For example the Murmur3 hash function or a cryptographic hash function. + + + **Parameters** + + obj : any object, The object to be hashed based on its content. + + hashes : dictionary, default = empty dictionary. + A dictionary of {object id: object hash} to start with. + Any object that is encountered and its id is already in the hashes dictionary, + will re-use the hash that is provided by this dictionary instead of re-calculating + its hash. + + exclude_types: list, default = None. + List of object types to exclude from hashing. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + + hasher: function. default = hash + hasher is the hashing function. The default is built-in hash function. + But you can pass another hash function to it if you want. + For example the Murmur3 hash function or a cryptographic hash function. + All it needs is a function that takes the input in string format + and return the hash. + + SHA1 is already provided as an alternative to the built-in hash function. + You can use it by passing: hasher=DeepHash.sha1hex + + ignore_repetition: Boolean, default = True + If repetitions in an iterable should cause the hash of iterable to be different. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + + significant_digits : int >= 0, default=None. + If it is a non negative integer, it compares only that many digits AFTER + the decimal point. + + This only affects floats, decimal.Decimal and complex. + + Takse a look at DeepDiff.diff docs for explanation of how this works. + + constant_size: Boolean, default = True + What DeepHash does is to "prep" the contents of objects into strings. + If constant_size is set, then it actually goes ahead and hashes the string + using the hasher function. + + The only time you want the constant_size to be False is if you want to know what + the string representation of your object is BEFORE it gets hashed. + + **Returns** + A dictionary of {item id: item hash}. + If your object is nested, it will include hashes of all the objects it includes! + + + **Examples** + + Let's say you have a dictionary object. + >>> from deepdiff import DeepHash + >>> + >>> obj = {1: 2, 'a': 'b'} + + If you try to hash itL + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + + But with DeepHash: + """ def __init__(self, @@ -64,7 +143,7 @@ def __init__(self, exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.hasher = self.basic_hash if hasher is None else hasher + self.hasher = hash if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) self['unprocessed'] = [] @@ -84,10 +163,6 @@ def __init__(self, else: del self['unprocessed'] - @staticmethod - def basic_hash(obj): - return str(hash(obj)) - @staticmethod def sha1hex(obj): """Use Sha1 for more accuracy.""" @@ -188,6 +263,7 @@ def __prep_iterable(self, obj, parents_ids=frozenset({})): ] result.sort() + result = map(str, result) # making sure the result items are string so join command works. result = ','.join(result) result = "{}:{}".format(type(obj).__name__, result) @@ -228,7 +304,6 @@ def __hash(self, obj, parents_ids=frozenset({})): obj_id = id(obj) if obj_id in self: - print('obj is already there') return self[obj_id] result = self.not_hashed @@ -263,7 +338,7 @@ def __hash(self, obj, parents_ids=frozenset({})): if result is self.not_hashed: # pragma: no cover self['unprocessed'].append(obj) - elif self.constant_size and not isinstance(obj, numbers): + elif self.constant_size: result = self.hasher(result) # It is important to keep the hash of all objects. diff --git a/deepdiff/search.py b/deepdiff/search.py index 05c8bf75..25f9061e 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -38,6 +38,12 @@ class DeepSearch(dict): exclude_types: list, default = None. List of object types to exclude from the report. + case_sensitive: Boolean, default = False + + match_string: Boolean, default = False + If True, the value of the object or its children have to exactly match the item. + If False, the value of the item can be a part of the value of the object or its children + **Returns** A DeepSearch object that has the matched paths and matched values. @@ -85,7 +91,8 @@ def __init__(self, if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, item, exclude_paths, exclude_types and verbose_level." + "The valid parameters are obj, item, exclude_paths, exclude_types,\n" + "case_sensitive, match_string and verbose_level." ) % ', '.join(kwargs.keys())) self.obj = obj @@ -206,7 +213,9 @@ def __search_dict(self, new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() - if str(item) in new_parent_cased: + str_item = str(item) + if (self.match_string and str_item == new_parent_cased) or\ + (not self.match_string and str_item in new_parent_cased): self.__report( report_key='matched_paths', key=new_parent, @@ -250,13 +259,8 @@ def __search_str(self, obj, item, parent): """Compare strings""" obj_text = obj if self.case_sensitive else obj.lower() - if self.match_string: - if item == obj_text: - self.__report(report_key='matched_values', key=parent, value=obj) - - else: - if item in obj_text: - self.__report(report_key='matched_values', key=parent, value=obj) + if (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): + self.__report(report_key='matched_values', key=parent, value=obj) def __search_numbers(self, obj, item, parent): if item == obj: diff --git a/docs/contenthash.rst b/docs/contenthash.rst new file mode 100644 index 00000000..179d8043 --- /dev/null +++ b/docs/contenthash.rst @@ -0,0 +1,14 @@ +:doc:`/index` + +DeepHash Reference +==================== + +.. toctree:: + :maxdepth: 3 + +.. automodule:: deepdiff.contenthash + +.. autoclass:: DeepHash + :members: + +Back to :doc:`/index` diff --git a/docs/index.rst b/docs/index.rst index fe4af5f8..4f27625f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,6 +25,7 @@ Importing >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects >>> from deepdiff import DeepSearch # For finding if item exists in an object + >>> from deepdiff import DeepHash # For hashing objects based on their contents ******** Features @@ -335,6 +336,11 @@ DeepSearch Reference :doc:`/dsearch` +DeepHash Reference +==================== + +:doc:`/contenthash` + Indices and tables ================== diff --git a/tests/test_hash.py b/tests/test_hash.py index bba31fe2..b5ae532d 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -51,7 +51,24 @@ def prep_str(obj): class DeepHashTestCase(unittest.TestCase): - """DeepHashPrep Tests.""" + + def test_dictionary(self): + + obj = {1: 1} + result = DeepHash(obj) + self.assertEqual(set(result.keys()), {id(1), id(obj)}) + + def test_list_of_sets(self): + a = {1} + b = {2} + obj = [a, b] + result = DeepHash(obj) + expected_result = {id(1), id(2), id(a), id(b), id(obj)} + self.assertEqual(set(result.keys()), expected_result) + + +class DeepHashPrepTestCase(unittest.TestCase): + """DeepHashPrep Tests covering object serialization.""" def test_prep_str(self): obj = "a" @@ -360,10 +377,9 @@ def test_list1(self): obj = [string1, 10, 20] expected_result = { id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', - id(obj): - '5af30c367e2e176f7c362356559f3e8cc73302e5', - id(10): 'int:10', - id(20): 'int:20', + id(obj): 'ad8e2f1479d6a5e1b01304f18f04bbe3ea0673ca', + id(10): DeepHash.sha1hex('int:10'), + id(20): DeepHash.sha1hex('int:20'), } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) @@ -373,14 +389,13 @@ def test_dict1(self): key1 = "key1" obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(1): 'int:1', - id(10): 'int:10', - id(2): 'int:2', - id(20): 'int:20', + id(1): DeepHash.sha1hex('int:1'), + id(10): DeepHash.sha1hex('int:10'), + id(2): DeepHash.sha1hex('int:2'), + id(20): DeepHash.sha1hex('int:20'), id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', - id(obj): - 'b13e2e23ed7e46208157e45bfbe0113782804e17' + id(obj): '8fa42fa0aa950885c4c1ec95a3d6423fc673bf49' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) From 00810564242db41106a8689f2a0b93ada1bd4dee Mon Sep 17 00:00:00 2001 From: Seperman Date: Thu, 2 Nov 2017 16:56:43 -0700 Subject: [PATCH 14/76] dealing with type conversions --- deepdiff/contenthash.py | 42 ++++++++++++++++++++++++++++------------- deepdiff/diff.py | 8 +++++--- deepdiff/helper.py | 5 +++++ tests/test_hash.py | 37 ++++++++++++++++++++++++++++++------ 4 files changed, 70 insertions(+), 22 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 46351844..886fe9a9 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -39,6 +39,27 @@ def __str__(self): return "Error: NotHashed" # pragma: no cover +def clean_type(obj, include_string_type_changes=False): + """ + Clean type conversions + """ + if py3: + if isinstance(obj, str): + if include_string_type_changes: + obj = "{}:{}".format(type(obj).__name__, obj) + obj = obj.encode('utf-8') + elif isinstance(obj, bytes) and include_string_type_changes: + obj = type(obj).__name__.encode('utf-8') + b":" + obj + else: + if isinstance(obj, unicode): + if include_string_type_changes: + obj = u"{}:{}".format(type(obj).__name__, obj) + obj = obj.encode('utf-8') + elif isinstance(obj, str) and include_string_type_changes: + obj = type(obj).__name__ + ":" + obj + return obj + + class DeepHash(dict): r""" **DeepHash** @@ -101,6 +122,10 @@ class DeepHash(dict): The only time you want the constant_size to be False is if you want to know what the string representation of your object is BEFORE it gets hashed. + include_string_type_changes: Boolean, default = False + string type conversions should not affect the hash output when this is set to False. + For example "Hello" and b"Hello" should produce the same hash. + **Returns** A dictionary of {item id: item hash}. If your object is nested, it will include hashes of all the objects it includes! @@ -131,6 +156,7 @@ def __init__(self, ignore_repetition=True, significant_digits=None, constant_size=True, + include_string_type_changes=False, **kwargs): if kwargs: raise ValueError( @@ -151,6 +177,7 @@ def __init__(self, self.skipped = Skipped() self.not_hashed = NotHashed() self.significant_digits = significant_digits + self.include_string_type_changes = include_string_type_changes # makes the hash return constant size result if true # the only time it should be set to False is when # testing the individual hash functions for different types of objects. @@ -166,18 +193,6 @@ def __init__(self, @staticmethod def sha1hex(obj): """Use Sha1 for more accuracy.""" - if py3: # pragma: no cover - if isinstance(obj, str): - obj = "{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') - elif isinstance(obj, bytes): - obj = type(obj).__name__.encode('utf-8') + b":" + obj - else: # pragma: no cover - if isinstance(obj, unicode): - obj = u"{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') - elif isinstance(obj, str): - obj = type(obj).__name__ + ":" + obj return sha1(obj).hexdigest() @staticmethod @@ -339,7 +354,8 @@ def __hash(self, obj, parents_ids=frozenset({})): self['unprocessed'].append(obj) elif self.constant_size: - result = self.hasher(result) + result_cleaned = clean_type(result, self.include_string_type_changes) + result = self.hasher(result_cleaned) # It is important to keep the hash of all objects. # The hashes will be later used for comparing the objects. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index b8fe5114..073dccd8 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -656,6 +656,7 @@ def __init__(self, exclude_paths=set(), exclude_regex_paths=set(), exclude_types=set(), + include_string_type_changes=False, verbose_level=1, view='text', **kwargs): @@ -670,8 +671,8 @@ def __init__(self, self.exclude_paths = set(exclude_paths) self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in set(exclude_regex_paths)] self.exclude_types = set(exclude_types) - self.exclude_types_tuple = tuple( - exclude_types) # we need tuple for checking isinstance + self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance + self.include_string_type_changes = include_string_type_changes self.hashes = {} if significant_digits is not None and significant_digits < 0: @@ -991,7 +992,8 @@ def __create_hashtable(self, t, level): try: hashes_all = DeepHash(item, hashes=self.hashes, - significant_digits=self.significant_digits) + significant_digits=self.significant_digits, + include_string_type_changes=self.include_string_type_changes) item_hash = hashes_all.get(id(item), item) except Exception as e: # pragma: no cover logger.warning("Can not produce a hash for %s." diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 64ce0522..b41fd7b5 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -11,6 +11,11 @@ py_minor_version = sys.version[2] py3 = py_major_version == '3' +py4 = py_major_version == '4' + +if py4: + logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') + py3 = True if (py_major_version, py_minor_version) == (2.6): # pragma: no cover sys.exit('Python 2.6 is not supported.') diff --git a/tests/test_hash.py b/tests/test_hash.py index b5ae532d..3115bdf2 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -337,7 +337,7 @@ class DeepHashSHA1TestCase(unittest.TestCase): def test_prep_str(self): obj = "a" expected_result = { - id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' + id(obj): '48591f1d794734cabf55f96f5a5a72c084f13ac0' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) @@ -351,7 +351,7 @@ def test_prep_str_fail_if_mutable(self): obj1 = "a" id_obj1 = id(obj1) expected_result = { - id_obj1: 'c2a00c48d4713267a2ab9ca9739214127830e9be' + id_obj1: '48591f1d794734cabf55f96f5a5a72c084f13ac0' } result = DeepHash(obj1, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) @@ -363,11 +363,11 @@ def test_bytecode(self): obj = b"a" if py3: expected_result = { - id(obj): '64a91ccb03c69f78d076d884de9bc5355849cc12' + id(obj): '1283c61f8aa47c22d22552b742c93f6f6dac83ab' } else: expected_result = { - id(obj): 'c2a00c48d4713267a2ab9ca9739214127830e9be' + id(obj): '48591f1d794734cabf55f96f5a5a72c084f13ac0' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) @@ -376,7 +376,7 @@ def test_list1(self): string1 = "a" obj = [string1, 10, 20] expected_result = { - id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', + id(string1): '48591f1d794734cabf55f96f5a5a72c084f13ac0', id(obj): 'ad8e2f1479d6a5e1b01304f18f04bbe3ea0673ca', id(10): DeepHash.sha1hex('int:10'), id(20): DeepHash.sha1hex('int:20'), @@ -394,8 +394,33 @@ def test_dict1(self): id(2): DeepHash.sha1hex('int:2'), id(20): DeepHash.sha1hex('int:20'), id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', - id(string1): 'c2a00c48d4713267a2ab9ca9739214127830e9be', + id(string1): '48591f1d794734cabf55f96f5a5a72c084f13ac0', id(obj): '8fa42fa0aa950885c4c1ec95a3d6423fc673bf49' } result = DeepHash(obj, hasher=DeepHash.sha1hex) self.assertEqual(result, expected_result) + + +class TestHasher(unittest.TestCase): + + def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): + if py3: + a = 'hello' + b = b'hello' + else: + a = u'hello' + b = b'hello' + a_hash = DeepHash(a)[id(a)] + b_hash = DeepHash(b)[id(b)] + self.assertEqual(a_hash, b_hash) + + def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): + if py3: + a = 'hello' + b = b'hello' + else: + a = u'hello' + b = b'hello' + a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[id(a)] + b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[id(b)] + self.assertEqual(a_hash, b_hash) From 6801d21ddb0645ed45dbcc8aa52ce892cd5f1084 Mon Sep 17 00:00:00 2001 From: Seperman Date: Fri, 3 Nov 2017 14:50:29 -0700 Subject: [PATCH 15/76] Clean type test --- tests/test_hash.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_hash.py b/tests/test_hash.py index 3115bdf2..bd6bb526 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -22,6 +22,7 @@ """ import unittest from deepdiff import DeepHash +from deepdiff.contenthash import clean_type from deepdiff.helper import py3, pypy3 from collections import namedtuple from functools import partial @@ -424,3 +425,27 @@ def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[id(a)] b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[id(b)] self.assertEqual(a_hash, b_hash) + + +class TestCleaningString(unittest.TestCase): + + def test_clean_type(self): + + if py3: + + params = ( + (b'hello', b'bytes:hello'), + ('hello', 'str:hello') + ) + else: + params = ( + ('hello', 'str:hello'), + (u'hello', u'unicode:hello') + ) + + for text, expected_result in params: + result = clean_type(text, include_string_type_changes=True) + self.assertEqual(result, expected_result) + + result = clean_type(text, include_string_type_changes=False) + self.assertEqual(result, text) From 38a2749ea341513e1f1cfab622a9045717773b3d Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 27 Mar 2018 23:02:46 -0700 Subject: [PATCH 16/76] fixing tests --- .gitignore | 1 + conftest.py | 4 ++++ deepdiff/contenthash.py | 2 +- tests/test_hash.py | 15 +++++++-------- 4 files changed, 13 insertions(+), 9 deletions(-) create mode 100644 conftest.py diff --git a/.gitignore b/.gitignore index bed0daa2..538379b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +.pytest_cache/ # C extensions *.so diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..03fd3199 --- /dev/null +++ b/conftest.py @@ -0,0 +1,4 @@ +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'tests'))) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 886fe9a9..07fbe7cc 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -54,7 +54,7 @@ def clean_type(obj, include_string_type_changes=False): if isinstance(obj, unicode): if include_string_type_changes: obj = u"{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') + # obj = obj.encode('utf-8') elif isinstance(obj, str) and include_string_type_changes: obj = type(obj).__name__ + ":" + obj return obj diff --git a/tests/test_hash.py b/tests/test_hash.py index bd6bb526..bdc483f7 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -432,20 +432,19 @@ class TestCleaningString(unittest.TestCase): def test_clean_type(self): if py3: - params = ( - (b'hello', b'bytes:hello'), - ('hello', 'str:hello') + (b'hello', b'hello', b'bytes:hello'), + ('hello', b'hello', b'str:hello') ) else: params = ( - ('hello', 'str:hello'), - (u'hello', u'unicode:hello') + ('hello', 'hello', 'str:hello'), + (u'hello', 'hello', 'unicode:hello') ) - for text, expected_result in params: + for text, text_in_byte, expected_result in params: result = clean_type(text, include_string_type_changes=True) - self.assertEqual(result, expected_result) + assert result == expected_result result = clean_type(text, include_string_type_changes=False) - self.assertEqual(result, text) + assert result == text_in_byte From d656a27cf167f1f52d61666c5626a26187c29105 Mon Sep 17 00:00:00 2001 From: Juan J Soler Date: Fri, 14 Sep 2018 13:51:22 +0200 Subject: [PATCH 17/76] ignore_type_number : Boolean, default=False ignores types when t1 and t2 are numbers. --- deepdiff/diff.py | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index c14e5b28..c94c84eb 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -56,6 +56,8 @@ class DeepDiff(ResultDict): Normally ignore_order does not report duplicates and repetition changes. In order to report repetitions, set report_repetition=True in addition to ignore_order=True + ignore_type_number : Boolean, default=False ignores types when t1 and t2 are numbers. + report_repetition : Boolean, default=False reports repetitions when set True ONLY when ignore_order is set True too. This works for iterables. This feature currently is experimental and is not production ready. @@ -263,6 +265,44 @@ class DeepDiff(ResultDict): 'old_repeat': 1, 'value': 4}}} + Dictionary that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[1]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) + >>> pprint(ddiff, indent=2) + {} + + List that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 2, 3] + >>> t2 = [1.0, 2.0, 3.0] + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) + >>> pprint(ddiff, indent=2) + {} + List that contains dictionary: >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} @@ -612,6 +652,7 @@ def __init__(self, t1, t2, ignore_order=False, + ignore_type_number=False, report_repetition=False, significant_digits=None, exclude_paths=set(), @@ -622,10 +663,11 @@ def __init__(self, if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are ignore_order, report_repetition, significant_digits," + "The valid parameters are ignore_order, ignore_type_number, report_repetition, significant_digits," "exclude_paths, exclude_types, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order + self.ignore_type_number = ignore_type_number self.report_repetition = report_repetition self.exclude_paths = set(exclude_paths) self.exclude_types = set(exclude_types) @@ -1073,7 +1115,7 @@ def __diff(self, level, parents_ids=frozenset({})): if self.__skip_this(level): return - if type(level.t1) != type(level.t2): + if not isinstance(level.t1, type(level.t2)) and not (self.ignore_type_number and isinstance(level.t1, numbers) and isinstance(level.t2, numbers)): self.__diff_types(level) elif isinstance(level.t1, strings): From 1409d952d6a349127a4ecfc0f1c085f14ed018e6 Mon Sep 17 00:00:00 2001 From: Brian Maissy Date: Thu, 8 Nov 2018 11:39:57 +0200 Subject: [PATCH 18/76] fix comparison of objects with __weakref__ in __slots__ --- deepdiff/diff.py | 2 +- tests/test_diff_text.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) mode change 100644 => 100755 deepdiff/diff.py mode change 100644 => 100755 tests/test_diff_text.py diff --git a/deepdiff/diff.py b/deepdiff/diff.py old mode 100644 new mode 100755 index 073dccd8..29017f6a --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -743,7 +743,7 @@ def __add_to_frozen_set(parents_ids, item_id): @staticmethod def __dict_from_slots(object): def unmangle(attribute): - if attribute.startswith('__'): + if attribute.startswith('__') and attribute != '__weakref__': return '_{type}{attribute}'.format( type=type(object).__name__, attribute=attribute diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py old mode 100644 new mode 100755 index eb84faa4..5b410e92 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -836,6 +836,26 @@ def __str__(self): diff = DeepDiff(t1, t2) self.assertEqual(diff, {}) + def test_custom_objects_with_weakref_in_slots(self): + class ClassA(object): + __slots__ = ['a', '__weakref__'] + + def __init__(self, a): + self.a = a + + t1 = ClassA(1) + t2 = ClassA(2) + diff = DeepDiff(t1, t2) + result = { + 'values_changed': { + 'root.a': { + 'new_value': 2, + 'old_value': 1 + } + }, + } + self.assertEqual(diff, result) + def get_custom_objects_add_and_remove(self): class ClassA(object): a = 1 From 35fda1cb73ce2e98f24f1652a07754767e422911 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 27 Nov 2018 23:58:11 -0800 Subject: [PATCH 19/76] wip --- .travis.yml | 5 +- deepdiff/contenthash.py | 122 +++++++---------- deepdiff/diff.py | 8 +- deepdiff/helper.py | 27 ++-- tests/test_diff_tree.py | 14 +- tests/test_hash.py | 289 +++++++++++++++++----------------------- 6 files changed, 198 insertions(+), 267 deletions(-) diff --git a/.travis.yml b/.travis.yml index 07a97317..5ca01f81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,12 @@ language: python python: - - "2.7" - "3.3" - "3.4" - "3.5" - "3.6" - - "pypy-5.4" # pypy on python 2.7 - # - "pypy3" # Removing pypy3 from travis since travis's pypy3 seems buggy + - "3.7" + - "pypy3" sudo: false diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 07fbe7cc..5432a773 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -1,8 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import print_function -import sys from collections import Iterable from collections import MutableMapping from collections import defaultdict @@ -10,53 +7,39 @@ from hashlib import sha1 import logging -from deepdiff.helper import py3, int, strings, numbers, items +from deepdiff.helper import strings, numbers, items logger = logging.getLogger(__name__) -class Skipped(object): +class OtherTypes: def __repr__(self): - return "Skipped" # pragma: no cover + return "Error: {}".format(self.__class__.__name__) # pragma: no cover - def __str__(self): - return "Skipped" # pragma: no cover + __str__ = __repr__ -class Unprocessed(object): - def __repr__(self): - return "Error: Unprocessed" # pragma: no cover +class Skipped(OtherTypes): + pass - def __str__(self): - return "Error: Unprocessed" # pragma: no cover +class Unprocessed(OtherTypes): + pass -class NotHashed(object): - def __repr__(self): - return "Error: NotHashed" # pragma: no cover - def __str__(self): - return "Error: NotHashed" # pragma: no cover +class NotHashed(OtherTypes): + pass -def clean_type(obj, include_string_type_changes=False): +def prepare_string_for_hashing(obj, include_string_type_changes=False): """ Clean type conversions """ - if py3: - if isinstance(obj, str): - if include_string_type_changes: - obj = "{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') - elif isinstance(obj, bytes) and include_string_type_changes: - obj = type(obj).__name__.encode('utf-8') + b":" + obj - else: - if isinstance(obj, unicode): - if include_string_type_changes: - obj = u"{}:{}".format(type(obj).__name__, obj) - # obj = obj.encode('utf-8') - elif isinstance(obj, str) and include_string_type_changes: - obj = type(obj).__name__ + ":" + obj + original_type = obj.__class__.__name__ + if isinstance(obj, bytes): + obj = obj.decode('utf-8') + if include_string_type_changes: + obj = "{}:{}".format(original_type, obj) return obj @@ -183,7 +166,7 @@ def __init__(self, # testing the individual hash functions for different types of objects. self.constant_size = constant_size - self.__hash(obj, parents_ids=frozenset({id(obj)})) + self._hash(obj, parents_ids=frozenset({id(obj)})) if self['unprocessed']: logger.warning("Can not hash the following items: {}.".format(self['unprocessed'])) @@ -196,12 +179,12 @@ def sha1hex(obj): return sha1(obj).hexdigest() @staticmethod - def __add_to_frozen_set(parents_ids, item_id): + def _add_to_frozen_set(parents_ids, item_id): parents_ids = set(parents_ids) parents_ids.add(item_id) return frozenset(parents_ids) - def __prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): + def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): """Difference of 2 objects""" try: if is_namedtuple: @@ -215,31 +198,30 @@ def __prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): self['unprocessed'].append(obj) return self.unprocessed - result = self.__prep_dict(obj, parents_ids) - result = "nt{}".format(result) if is_namedtuple else "obj{}".format( - result) + result = self._prep_dict(obj, parents_ids) + result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) return result - def __skip_this(self, obj): + def _skip_this(self, obj): skip = False if isinstance(obj, self.exclude_types_tuple): skip = True return skip - def __prep_dict(self, obj, parents_ids=frozenset({})): + def _prep_dict(self, obj, parents_ids=frozenset({})): result = [] obj_keys = set(obj.keys()) for key in obj_keys: - key_hash = self.__hash(key) + key_hash = self._hash(key) item = obj[key] item_id = id(item) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) - hashed = self.__hash(item, parents_ids_added) + parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) + hashed = self._hash(item, parents_ids_added) hashed = "{}:{}".format(key_hash, hashed) result.append(hashed) @@ -249,23 +231,23 @@ def __prep_dict(self, obj, parents_ids=frozenset({})): return result - def __prep_set(self, obj): - return "set:{}".format(self.__prep_iterable(obj)) + def _prep_set(self, obj): + return "set:{}".format(self._prep_iterable(obj)) - def __prep_iterable(self, obj, parents_ids=frozenset({})): + def _prep_iterable(self, obj, parents_ids=frozenset({})): result = defaultdict(int) for i, x in enumerate(obj): - if self.__skip_this(x): + if self._skip_this(x): continue item_id = id(x) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) - hashed = self.__hash(x, parents_ids_added) + parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) + hashed = self._hash(x, parents_ids_added) # counting repetitions result[hashed] += 1 @@ -277,17 +259,14 @@ def __prep_iterable(self, obj, parents_ids=frozenset({})): '{}|{}'.format(i, v) for i, v in getattr(result, items)() ] - result.sort() - result = map(str, result) # making sure the result items are string so join command works. + result = sorted(map(str, result)) # making sure the result items are string so join command works. + # result.sort() result = ','.join(result) result = "{}:{}".format(type(obj).__name__, result) return result - def __prep_str(self, obj): - return 'str:{}'.format(obj) - - def __prep_number(self, obj): + def _prep_number(self, obj): # Based on diff.DeepDiff.__diff_numbers if self.significant_digits is not None and isinstance(obj, ( float, complex, Decimal)): @@ -301,20 +280,20 @@ def __prep_number(self, obj): result = "{}:{}".format(type(obj).__name__, obj) return result - def __prep_tuple(self, obj, parents_ids): + def _prep_tuple(self, obj, parents_ids): # Checking to see if it has _fields. Which probably means it is a named # tuple. try: obj._asdict # It must be a normal tuple except AttributeError: - result = self.__prep_iterable(obj, parents_ids) + result = self._prep_iterable(obj, parents_ids) # We assume it is a namedtuple then else: - result = self.__prep_obj(obj, parents_ids, is_namedtuple=True) + result = self._prep_obj(obj, parents_ids, is_namedtuple=True) return result - def __hash(self, obj, parents_ids=frozenset({})): + def _hash(self, obj, parents_ids=frozenset({})): """The main diff method""" obj_id = id(obj) @@ -323,38 +302,41 @@ def __hash(self, obj, parents_ids=frozenset({})): result = self.not_hashed - if self.__skip_this(obj): + if self._skip_this(obj): result = self.skipped elif obj is None: result = 'NONE' elif isinstance(obj, strings): - result = self.__prep_str(obj) + result = prepare_string_for_hashing(obj, include_string_type_changes=self.include_string_type_changes) elif isinstance(obj, numbers): - result = self.__prep_number(obj) + result = self._prep_number(obj) elif isinstance(obj, MutableMapping): - result = self.__prep_dict(obj, parents_ids) + result = self._prep_dict(obj, parents_ids) elif isinstance(obj, tuple): - result = self.__prep_tuple(obj, parents_ids) + result = self._prep_tuple(obj, parents_ids) elif isinstance(obj, (set, frozenset)): - result = self.__prep_set(obj) + result = self._prep_set(obj) elif isinstance(obj, Iterable): - result = self.__prep_iterable(obj, parents_ids) + result = self._prep_iterable(obj, parents_ids) else: - result = self.__prep_obj(obj, parents_ids) + result = self._prep_obj(obj, parents_ids) if result is self.not_hashed: # pragma: no cover self['unprocessed'].append(obj) elif self.constant_size: - result_cleaned = clean_type(result, self.include_string_type_changes) + if isinstance(obj, strings): + result_cleaned = result + else: + result_cleaned = prepare_string_for_hashing(result, include_string_type_changes=self.include_string_type_changes) result = self.hasher(result_cleaned) # It is important to keep the hash of all objects. @@ -365,7 +347,5 @@ def __hash(self, obj, parents_ids=frozenset({})): if __name__ == "__main__": # pragma: no cover - if not py3: - sys.exit("Please run with Python 3 to verify the doc strings.") import doctest doctest.testmod() diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 073dccd8..506f980c 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -329,7 +329,8 @@ class DeepDiff(ResultDict): .. note:: - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. + All the examples for the text view work for the tree view too. + You just need to set view='tree' to get it in tree form. **Tree View** @@ -344,7 +345,8 @@ class DeepDiff(ResultDict): .. note:: The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. + Behind the scene, DeepDiff creates the tree view first and then converts it to textual + representation for the text view. .. code:: text @@ -1119,7 +1121,7 @@ def __diff(self, level, parents_ids=frozenset({})): if self.__skip_this(level): return - if type(level.t1) != type(level.t2): + if type(level.t1) != type(level.t2): # NOQA self.__diff_types(level) elif isinstance(level.t1, strings): diff --git a/deepdiff/helper.py b/deepdiff/helper.py index b41fd7b5..5fdc4401 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -10,6 +10,7 @@ py_major_version = sys.version[0] py_minor_version = sys.version[2] +py2 = py_major_version == '2' py3 = py_major_version == '3' py4 = py_major_version == '4' @@ -17,27 +18,17 @@ logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') py3 = True -if (py_major_version, py_minor_version) == (2.6): # pragma: no cover - sys.exit('Python 2.6 is not supported.') +if py2: # pragma: no cover + sys.exit('Python 2 is not supported. The last version of DeepDiff that supported Py2 was 3.3.0') pypy3 = py3 and hasattr(sys, "pypy_translation_info") -if py3: # pragma: no cover - from builtins import int - strings = (str, bytes) # which are both basestring - unicode_type = str - bytes_type = bytes - numbers = (int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) - items = 'items' -else: # pragma: no cover - int = int - strings = (str, unicode) - unicode_type = unicode - bytes_type = str - numbers = (int, float, long, complex, datetime.datetime, datetime.date, datetime.timedelta, - Decimal) - - items = 'iteritems' +# from builtins import int +strings = (str, bytes) # which are both basestring +unicode_type = str +bytes_type = bytes +numbers = (int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) +items = 'items' IndexedHash = namedtuple('IndexedHash', 'indexes item') diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index 4bb4f09f..6e68fc23 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -16,7 +16,7 @@ To run a specific test, run this from the root of repo: python -m unittest tests.test_diff_tree.DeepDiffTreeTestCase.test_same_objects """ -import unittest +import pytest from deepdiff import DeepDiff from deepdiff.helper import pypy3, notpresent from deepdiff.model import DictRelationship, NonSubscriptableIterableRelationship @@ -25,7 +25,7 @@ logging.disable(logging.CRITICAL) -class DeepDiffTreeTestCase(unittest.TestCase): +class DeepDiffTreeTestCase: """DeepDiff Tests.""" def test_same_objects(self): @@ -174,7 +174,7 @@ def test_repr(self): self.fail("Converting ddiff to string raised: {}".format(e)) -class DeepDiffTreeWithNumpyTestCase(unittest.TestCase): +class DeepDiffTreeWithNumpyTestCase: """DeepDiff Tests with Numpy.""" def setUp(self): @@ -185,13 +185,13 @@ def setUp(self): self.d1 = {'np': a1} self.d2 = {'np': a2} - @unittest.skipIf(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, "Numpy is not compatible with pypy3") def test_diff_with_numpy(self): ddiff = DeepDiff(self.d1, self.d2) res = ddiff.tree self.assertEqual(res, {}) - @unittest.skipIf(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, "Numpy is not compatible with pypy3") def test_diff_with_empty_seq(self): a1 = {"empty": []} a2 = {"empty": []} @@ -199,10 +199,10 @@ def test_diff_with_empty_seq(self): self.assertEqual(ddiff, {}) -class DeepAdditionsTestCase(unittest.TestCase): +class DeepAdditionsTestCase: """Tests for Additions and Subtractions.""" - @unittest.expectedFailure + @pytest.mark.skip(reason="Not currently implemented") def test_adding_list_diff(self): t1 = [1, 2] t2 = [1, 2, 3, 5] diff --git a/tests/test_hash.py b/tests/test_hash.py index bdc483f7..f1aa3235 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,29 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_hash - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - On linux: - nosetests ./tests/test_hash.py:DeepHashTestCase.test_bytecode - - On windows: - nosetests .\tests\test_hash.py:DeepHashTestCase.test_string_in_root -""" -import unittest +import pytest from deepdiff import DeepHash -from deepdiff.contenthash import clean_type -from deepdiff.helper import py3, pypy3 +from deepdiff.contenthash import prepare_string_for_hashing +from deepdiff.helper import pypy3 from collections import namedtuple from functools import partial import logging @@ -47,17 +27,17 @@ def __repr__(self): DeepHashPrep = partial(DeepHash, constant_size=False) -def prep_str(obj): - return 'str:{}'.format(obj) +def prep_str(obj, include_string_type_changes=False): + return 'str:{}'.format(obj) if include_string_type_changes else obj -class DeepHashTestCase(unittest.TestCase): +class TestDeepHash: def test_dictionary(self): obj = {1: 1} result = DeepHash(obj) - self.assertEqual(set(result.keys()), {id(1), id(obj)}) + assert set(result.keys()) == {id(1), id(obj)} def test_list_of_sets(self): a = {1} @@ -65,17 +45,20 @@ def test_list_of_sets(self): obj = [a, b] result = DeepHash(obj) expected_result = {id(1), id(2), id(a), id(b), id(obj)} - self.assertEqual(set(result.keys()), expected_result) + assert set(result.keys()) == expected_result -class DeepHashPrepTestCase(unittest.TestCase): +class TestDeepHashPrep: """DeepHashPrep Tests covering object serialization.""" def test_prep_str(self): obj = "a" expected_result = {id(obj): prep_str(obj)} result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) + assert expected_result == result + expected_result = {id(obj): prep_str(obj, include_string_type_changes=True)} + result = DeepHashPrep(obj, include_string_type_changes=True) + assert expected_result == result def test_prep_str_fail_if_mutable(self): """ @@ -87,10 +70,10 @@ def test_prep_str_fail_if_mutable(self): id_obj1 = id(obj1) expected_result = {id_obj1: prep_str(obj1)} result = DeepHashPrep(obj1) - self.assertEqual(result, expected_result) + assert expected_result == result obj2 = "b" result = DeepHashPrep(obj2) - self.assertTrue(id_obj1 not in result) + assert id_obj1 not in result def do_list_or_tuple(self, func, func_str): string1 = "a" @@ -103,7 +86,7 @@ def do_list_or_tuple(self, func, func_str): id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), } result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) + assert expected_result == result def test_list_and_tuple(self): for func, func_str in ((list, 'list'), (tuple, 'tuple')): @@ -120,16 +103,16 @@ def test_named_tuples(self): obj = Point(x=11) result = DeepHashPrep(obj) if pypy3: - self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % x) + assert result[id(obj)] == 'ntdict:{%s:int:11}' % x else: expected_result = { x_id: x_prep, - id(obj): 'ntdict:{str:%s:int:11}' % x, + id(obj): 'ntdict:{%s:int:11}' % x, id(11): 'int:11', } - self.assertEqual(result, expected_result) + assert expected_result == result - def test_dict(self): + def test_dict_hash(self): string1 = "a" string1_prepped = prep_str(string1) key1 = "key1" @@ -145,7 +128,7 @@ def test_dict(self): id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) } result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) + assert expected_result == result def test_dict_in_list(self): string1 = "a" @@ -167,7 +150,7 @@ def test_dict_in_list(self): (key1, string1) } result = DeepHashPrep(obj) - self.assertEqual(result, expected_result) + assert expected_result == result def test_nested_lists_same_hash(self): t1 = [1, 2, [3, 4]] @@ -175,7 +158,7 @@ def test_nested_lists_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_nested_lists_same_hash2(self): t1 = [1, 2, [3, [4, 5]]] @@ -183,7 +166,7 @@ def test_nested_lists_same_hash2(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_nested_lists_same_hash3(self): t1 = [{1: [2, 3], 4: [5, [6, 7]]}] @@ -191,7 +174,7 @@ def test_nested_lists_same_hash3(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_nested_lists_in_dictionary_same_hash(self): t1 = [{"c": 4}, {"c": 3}] @@ -199,7 +182,7 @@ def test_nested_lists_in_dictionary_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_same_sets_same_hash(self): t1 = {1, 3, 2} @@ -207,7 +190,7 @@ def test_same_sets_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_similar_sets_with_significant_digits_same_hash(self): t1 = {0.012, 0.98} @@ -215,7 +198,7 @@ def test_similar_sets_with_significant_digits_same_hash(self): t1_hash = DeepHashPrep(t1, significant_digits=1) t2_hash = DeepHashPrep(t2, significant_digits=1) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] @@ -223,7 +206,7 @@ def test_same_sets_in_lists_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) + assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_unknown_parameters(self): with self.assertRaises(ValueError): @@ -243,7 +226,7 @@ def __str__(self): result = DeepHashPrep(t1) expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} - self.assertEqual(result, expected_result) + assert expected_result == result def test_repetition_by_default_does_not_effect(self): list1 = [3, 4] @@ -259,8 +242,8 @@ def test_repetition_by_default_does_not_effect(self): hash_a = DeepHashPrep(a) hash_b = DeepHashPrep(b) - self.assertEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertEqual(hash_a[a_id], hash_b[b_id]) + assert hash_a[list1_id] == hash_b[list2_id] + assert hash_a[a_id] == hash_b[b_id] def test_setting_repetition_off_unequal_hash(self): list1 = [3, 4] @@ -279,8 +262,7 @@ def test_setting_repetition_off_unequal_hash(self): self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) self.assertNotEqual(hash_a[a_id], hash_b[b_id]) - self.assertEqual(hash_a[list1_id].replace('3|1', '3|2'), - hash_b[list2_id]) + assert hash_a[list1_id].replace('3|1', '3|2') == hash_b[list2_id] def test_already_calculated_hash_wont_be_recalculated(self): hashes = (i for i in range(10)) @@ -291,7 +273,7 @@ def hasher(obj): obj = "a" expected_result = {id(obj): '0'} result = DeepHash(obj, hasher=hasher) - self.assertEqual(result, expected_result) + assert expected_result == result # we simply feed the last result to DeepHash # So it can re-use the results. @@ -299,152 +281,129 @@ def hasher(obj): # if hashes are not cached and re-used, # then the next time hasher runs, it returns # number 1 instead of 0. - self.assertEqual(result2, expected_result) + assert expected_result == result2 result3 = DeepHash(obj, hasher=hasher) expected_result = {id(obj): '1'} - self.assertEqual(result3, expected_result) + assert expected_result == result3 def test_skip_type(self): l1 = logging.getLogger("test") obj = {"log": l1, 2: 1337} result = DeepHashPrep(obj, exclude_types={logging.Logger}) - self.assertEqual(result[id(l1)], result.skipped) + assert result[id(l1)] == result.skipped def test_prep_dic_with_loop(self): obj = {2: 1337} obj[1] = obj result = DeepHashPrep(obj) expected_result = {id(obj): 'dict:{int:2:int:1337}', id(1): 'int:1', id(2): 'int:2', id(1337): 'int:1337'} - self.assertEqual(result, expected_result) + assert expected_result == result def test_prep_iterable_with_loop(self): obj = [1] obj.append(obj) result = DeepHashPrep(obj) expected_result = {id(obj): 'list:int:1', id(1): 'int:1'} - self.assertEqual(result, expected_result) + assert expected_result == result def test_prep_iterable_with_excluded_type(self): l1 = logging.getLogger("test") obj = [1, l1] result = DeepHashPrep(obj, exclude_types={logging.Logger}) - self.assertTrue(id(l1) not in result) - - -class DeepHashSHA1TestCase(unittest.TestCase): - """DeepHash with SHA1 Tests.""" - - def test_prep_str(self): - obj = "a" - expected_result = { - id(obj): '48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_prep_str_fail_if_mutable(self): - """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from - the previous init. - """ - obj1 = "a" - id_obj1 = id(obj1) - expected_result = { - id_obj1: '48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj1, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - obj2 = "b" - result = DeepHash(obj2, hasher=DeepHash.sha1hex) - self.assertTrue(id_obj1 not in result) - - def test_bytecode(self): - obj = b"a" - if py3: - expected_result = { - id(obj): '1283c61f8aa47c22d22552b742c93f6f6dac83ab' - } - else: - expected_result = { - id(obj): '48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_list1(self): - string1 = "a" - obj = [string1, 10, 20] - expected_result = { - id(string1): '48591f1d794734cabf55f96f5a5a72c084f13ac0', - id(obj): 'ad8e2f1479d6a5e1b01304f18f04bbe3ea0673ca', - id(10): DeepHash.sha1hex('int:10'), - id(20): DeepHash.sha1hex('int:20'), - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_dict1(self): - string1 = "a" - key1 = "key1" - obj = {key1: string1, 1: 10, 2: 20} - expected_result = { - id(1): DeepHash.sha1hex('int:1'), - id(10): DeepHash.sha1hex('int:10'), - id(2): DeepHash.sha1hex('int:2'), - id(20): DeepHash.sha1hex('int:20'), - id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', - id(string1): '48591f1d794734cabf55f96f5a5a72c084f13ac0', - id(obj): '8fa42fa0aa950885c4c1ec95a3d6423fc673bf49' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - -class TestHasher(unittest.TestCase): + assert id(l1) not in result + + +# class TestDeepHashSHA1: +# """DeepHash with SHA1 Tests.""" + +# def test_prep_str_sha1(self): +# obj = "a" +# expected_result = { +# id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' +# } +# result = DeepHash(obj, hasher=DeepHash.sha1hex) +# assert expected_result == result + +# def test_prep_str_sha1_fail_if_mutable(self): +# """ +# This test fails if ContentHash is getting a mutable copy of hashes +# which means each init of the ContentHash will have hashes from +# the previous init. +# """ +# obj1 = "a" +# id_obj1 = id(obj1) +# expected_result = { +# id_obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' +# } +# result = DeepHash(obj1, hasher=DeepHash.sha1hex) +# assert expected_result == result +# obj2 = "b" +# result = DeepHash(obj2, hasher=DeepHash.sha1hex) +# assert id_obj1 not in result) + +# def test_bytecode(self): +# obj = b"a" +# expected_result = { +# id(obj): '1283c61f8aa47c22d22552b742c93f6f6dac83ab' +# } +# result = DeepHash(obj, hasher=DeepHash.sha1hex) +# assert expected_result == result + +# def test_list1(self): +# string1 = "a" +# obj = [string1, 10, 20] +# expected_result = { +# id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', +# id(obj): 'ad8e2f1479d6a5e1b01304f18f04bbe3ea0673ca', +# id(10): DeepHash.sha1hex('int:10'), +# id(20): DeepHash.sha1hex('int:20'), +# } +# result = DeepHash(obj, hasher=DeepHash.sha1hex) +# assert expected_result == result + +# def test_dict1(self): +# string1 = "a" +# key1 = "key1" +# obj = {key1: string1, 1: 10, 2: 20} +# expected_result = { +# id(1): DeepHash.sha1hex('int:1'), +# id(10): DeepHash.sha1hex('int:10'), +# id(2): DeepHash.sha1hex('int:2'), +# id(20): DeepHash.sha1hex('int:20'), +# id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', +# id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', +# id(obj): '8fa42fa0aa950885c4c1ec95a3d6423fc673bf49' +# } +# result = DeepHash(obj, hasher=DeepHash.sha1hex) +# assert expected_result == result + + +class TestHasher: def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): - if py3: - a = 'hello' - b = b'hello' - else: - a = u'hello' - b = b'hello' + a = 'hello' + b = b'hello' a_hash = DeepHash(a)[id(a)] b_hash = DeepHash(b)[id(b)] - self.assertEqual(a_hash, b_hash) + assert a_hash == b_hash def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): - if py3: - a = 'hello' - b = b'hello' - else: - a = u'hello' - b = b'hello' + a = 'hello' + b = b'hello' a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[id(a)] b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[id(b)] - self.assertEqual(a_hash, b_hash) - + assert a_hash == b_hash -class TestCleaningString(unittest.TestCase): - - def test_clean_type(self): - - if py3: - params = ( - (b'hello', b'hello', b'bytes:hello'), - ('hello', b'hello', b'str:hello') - ) - else: - params = ( - ('hello', 'hello', 'str:hello'), - (u'hello', 'hello', 'unicode:hello') - ) - for text, text_in_byte, expected_result in params: - result = clean_type(text, include_string_type_changes=True) - assert result == expected_result +class TestCleaningString: - result = clean_type(text, include_string_type_changes=False) - assert result == text_in_byte + @pytest.mark.parametrize("text, include_string_type_changes, expected_result", [ + (b'hello', False, 'hello'), + (b'hello', True, 'bytes:hello'), + ('hello', False, 'hello'), + ('hello', True, 'str:hello'), + ]) + def test_clean_type(self, text, include_string_type_changes, expected_result): + result = prepare_string_for_hashing(text, include_string_type_changes=include_string_type_changes) + assert expected_result == result From c1e5869e40e004dc7db88c9983234333fdd62d61 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Wed, 28 Nov 2018 01:03:37 -0800 Subject: [PATCH 20/76] cleaning up --- .gitignore | 2 ++ deepdiff/__init__.py | 3 ++- deepdiff/contenthash.py | 55 +++++++++++++++++++++++++++-------------- deepdiff/diff.py | 6 +---- deepdiff/helper.py | 1 - deepdiff/model.py | 4 +-- deepdiff/search.py | 15 +++-------- requirements-dev.txt | 1 + requirements.txt | 2 ++ setup.py | 37 +++++++++++++++++++++------ tests/test_hash.py | 32 ++++++++++++------------ 11 files changed, 96 insertions(+), 62 deletions(-) create mode 100644 requirements-dev.txt create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 538379b8..359eaf68 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,5 @@ target/ .idea/ .~lock* +.python-version + diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 95ad688c..58b44d4b 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,4 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep and DeepHash classes.""" +# flake8: noqa +__version__ = '4.0.0' import logging if __name__ == '__main__': @@ -7,4 +9,3 @@ from .diff import DeepDiff from .search import DeepSearch, grep from .contenthash import DeepHash -from .helper import py3 diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 5432a773..7fbe796e 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -5,12 +5,16 @@ from collections import defaultdict from decimal import Decimal from hashlib import sha1 +import mmh3 import logging -from deepdiff.helper import strings, numbers, items +from deepdiff.helper import strings, numbers logger = logging.getLogger(__name__) +UNPROCESSED = 'unprocessed' +RESERVED_DICT_KEYS = {UNPROCESSED} + class OtherTypes: def __repr__(self): @@ -31,6 +35,11 @@ class NotHashed(OtherTypes): pass +unprocessed = Unprocessed() +skipped = Skipped() +not_hashed = NotHashed() + + def prepare_string_for_hashing(obj, include_string_type_changes=False): """ Clean type conversions @@ -155,10 +164,7 @@ def __init__(self, self.hasher = hash if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) - self['unprocessed'] = [] - self.unprocessed = Unprocessed() - self.skipped = Skipped() - self.not_hashed = NotHashed() + self[UNPROCESSED] = [] self.significant_digits = significant_digits self.include_string_type_changes = include_string_type_changes # makes the hash return constant size result if true @@ -168,16 +174,29 @@ def __init__(self, self._hash(obj, parents_ids=frozenset({id(obj)})) - if self['unprocessed']: - logger.warning("Can not hash the following items: {}.".format(self['unprocessed'])) + if self[UNPROCESSED]: + logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED])) else: - del self['unprocessed'] + del self[UNPROCESSED] @staticmethod def sha1hex(obj): - """Use Sha1 for more accuracy.""" + """Use Sha1 as a cryptographic hash.""" + obj = obj.encode('utf-8') return sha1(obj).hexdigest() + @staticmethod + def murmur3(obj): + """Use Sha1 as a cryptographic hash.""" + obj = obj.encode('utf-8') + return mmh3.hash(obj) + + def __getitem__(self, key): + if not isinstance(key, int) and key not in RESERVED_DICT_KEYS: + key = id(key) + + return super().__getitem__(key) + @staticmethod def _add_to_frozen_set(parents_ids, item_id): parents_ids = set(parents_ids) @@ -195,8 +214,8 @@ def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): try: obj = {i: getattr(obj, i) for i in obj.__slots__} except AttributeError: - self['unprocessed'].append(obj) - return self.unprocessed + self[UNPROCESSED].append(obj) + return unprocessed result = self._prep_dict(obj, parents_ids) result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) @@ -254,13 +273,11 @@ def _prep_iterable(self, obj, parents_ids=frozenset({})): if self.ignore_repetition: result = list(result.keys()) else: - # items could be iteritems based on py version so we use getattr result = [ - '{}|{}'.format(i, v) for i, v in getattr(result, items)() + '{}|{}'.format(i, v) for i, v in result.items() ] - result = sorted(map(str, result)) # making sure the result items are string so join command works. - # result.sort() + result = sorted(map(str, result)) # making sure the result items are string and sorted so join command works. result = ','.join(result) result = "{}:{}".format(type(obj).__name__, result) @@ -300,10 +317,10 @@ def _hash(self, obj, parents_ids=frozenset({})): if obj_id in self: return self[obj_id] - result = self.not_hashed + result = not_hashed if self._skip_this(obj): - result = self.skipped + result = skipped elif obj is None: result = 'NONE' @@ -329,8 +346,8 @@ def _hash(self, obj, parents_ids=frozenset({})): else: result = self._prep_obj(obj, parents_ids) - if result is self.not_hashed: # pragma: no cover - self['unprocessed'].append(obj) + if result is not_hashed: # pragma: no cover + self[UNPROCESSED].append(obj) elif self.constant_size: if isinstance(obj, strings): diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 506f980c..187e470f 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -7,9 +7,6 @@ # every time you run the docstrings. # However the docstring expects it in a specific order in order to pass! -from __future__ import absolute_import -from __future__ import print_function - import re import difflib import logging @@ -17,8 +14,7 @@ from decimal import Decimal -from collections import Mapping -from collections import Iterable +from collections.abc import Mapping, Iterable from deepdiff.helper import py3, strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 5fdc4401..3d30a1d2 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -28,7 +28,6 @@ unicode_type = str bytes_type = bytes numbers = (int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) -items = 'items' IndexedHash = namedtuple('IndexedHash', 'indexes item') diff --git a/deepdiff/model.py b/deepdiff/model.py index 3c549a11..82283a41 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from deepdiff.helper import items, RemapDict, strings, short_repr, Verbose, notpresent +from deepdiff.helper import RemapDict, strings, short_repr, Verbose, notpresent from ast import literal_eval from copy import copy @@ -33,7 +33,7 @@ def cleanup(self): Remove empty keys from this object. Should always be called after the result is final. :return: """ - empty_keys = [k for k, v in getattr(self, items)() if not v] + empty_keys = [k for k, v in self.items() if not v] for k in empty_keys: del self[k] diff --git a/deepdiff/search.py b/deepdiff/search.py index 25f9061e..f3f33494 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -2,16 +2,11 @@ # -*- coding: utf-8 -*- # In order to run the docstrings: # python3 -m deepdiff.search - -from __future__ import absolute_import -from __future__ import print_function import re -import sys -from collections import Iterable -from collections import MutableMapping +from collections.abc import MutableMapping, Iterable import logging -from deepdiff.helper import py3, strings, numbers, items +from deepdiff.helper import strings, numbers logger = logging.getLogger(__name__) @@ -114,7 +109,7 @@ def __init__(self, self.__search(obj, item, parents_ids=frozenset({id(obj)})) - empty_keys = [k for k, v in getattr(self, items)() if not v] + empty_keys = [k for k, v in self.items() if not v] for k in empty_keys: del self[k] @@ -155,7 +150,7 @@ def __search_obj(self, # Skip magic methods. Slightly hacky, but unless people are defining # new magic methods they want to search, it should work fine. obj = {i: getattr(obj, i) for i in dir(obj) - if not (i.startswith('__') and i.endswith('__'))} + if not (i.startswith('__') and i.endswith('__'))} except AttributeError: try: obj = {i: getattr(obj, i) for i in obj.__slots__} @@ -357,7 +352,5 @@ def __ror__(self, other): if __name__ == "__main__": # pragma: no cover - if not py3: - sys.exit("Please run with Python 3 to verify the doc strings.") import doctest doctest.testmod() diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..b957c99f --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +pytest==4.0.1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..2146c923 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +mmh3==2.5.1 +jsonpickle==1.0 diff --git a/setup.py b/setup.py index b318dc60..568b23c5 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ import os +import re from setuptools import setup # if you are not using vagrant, just delete os.link directly, @@ -6,14 +7,36 @@ if os.environ.get('USER', '') == 'vagrant': del os.link + +VERSIONFILE = "deepdiff/__init__.py" +with open(VERSIONFILE, "r") as the_file: + verstrline = the_file.read() +VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" +mo = re.search(VSRE, verstrline, re.M) +if mo: + verstr = mo.group(1) +else: + raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,)) + + +def get_reqs(filename): + with open(filename, "r") as reqs_file: + reqs = reqs_file.readlines() + reqs = list(map(lambda x: x.replace('==', '>='), reqs)) + return reqs + + +reqs = get_reqs("requirements.txt") + try: - with open('README.txt') as file: + with open('README.rst') as file: long_description = file.read() -except: +except Exception: long_description = "Deep Difference and Search of any Python object/data." + setup(name='deepdiff', - version='3.5.0', + version=verstr, description='Deep Difference and Search of any Python object/data.', url='https://github.com/seperman/deepdiff', download_url='https://github.com/seperman/deepdiff/tarball/master', @@ -21,23 +44,23 @@ author_email='sep@zepworks.com', license='MIT', packages=['deepdiff'], - zip_safe=False, + zip_safe=True, test_suite="tests", tests_require=['mock'], # 'numpy==1.11.2' numpy is needed but comes already installed with travis long_description=long_description, install_requires=[ - 'jsonpickle' + 'mmh3>=2.5.1' ], classifiers=[ "Intended Audience :: Developers", "Operating System :: OS Independent", "Topic :: Software Development", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: Implementation :: PyPy3", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License" ], diff --git a/tests/test_hash.py b/tests/test_hash.py index f1aa3235..e3fd77c6 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import pytest from deepdiff import DeepHash -from deepdiff.contenthash import prepare_string_for_hashing +from deepdiff.contenthash import prepare_string_for_hashing, skipped, unprocessed from deepdiff.helper import pypy3 from collections import namedtuple from functools import partial @@ -83,7 +83,7 @@ def do_list_or_tuple(self, func, func_str): id(10): 'int:10', id(20): 'int:20', id(string1): string1_prepped, - id(obj): '{}:int:10,int:20,{}'.format(func_str, string1_prepped), + id(obj): '{}:{},int:10,int:20'.format(func_str, string1_prepped), } result = DeepHashPrep(obj) assert expected_result == result @@ -125,7 +125,7 @@ def test_dict_hash(self): id(20): 'int:20', id(key1): key1_prepped, id(string1): string1_prepped, - id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % (key1, string1) + id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1) } result = DeepHashPrep(obj) assert expected_result == result @@ -141,12 +141,12 @@ def test_dict_in_list(self): id(10): 'int:10', id(2): 'int:2', id(20): 'int:20', - id(key1): "str:{}".format(key1), - id(string1): "str:{}".format(string1), - id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % + id(key1): key1, + id(string1): string1, + id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1), id(obj): - 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % + 'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' % (key1, string1) } result = DeepHashPrep(obj) @@ -209,7 +209,7 @@ def test_same_sets_in_lists_same_hash(self): assert t1_hash[id(t1)] == t2_hash[id(t2)] def test_unknown_parameters(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepHashPrep(1, wrong_param=2) def test_bad_attribute(self): @@ -225,7 +225,7 @@ def __str__(self): t1 = Bad() result = DeepHashPrep(t1) - expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} + expected_result = {id(t1): unprocessed, 'unprocessed': [t1]} assert expected_result == result def test_repetition_by_default_does_not_effect(self): @@ -259,8 +259,8 @@ def test_setting_repetition_off_unequal_hash(self): hash_a = DeepHashPrep(a, ignore_repetition=False) hash_b = DeepHashPrep(b, ignore_repetition=False) - self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertNotEqual(hash_a[a_id], hash_b[b_id]) + assert not hash_a[list1_id] == hash_b[list2_id] + assert not hash_a[a_id] == hash_b[b_id] assert hash_a[list1_id].replace('3|1', '3|2') == hash_b[list2_id] @@ -291,7 +291,7 @@ def test_skip_type(self): l1 = logging.getLogger("test") obj = {"log": l1, 2: 1337} result = DeepHashPrep(obj, exclude_types={logging.Logger}) - assert result[id(l1)] == result.skipped + assert result[id(l1)] is skipped def test_prep_dic_with_loop(self): obj = {2: 1337} @@ -384,15 +384,15 @@ class TestHasher: def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): a = 'hello' b = b'hello' - a_hash = DeepHash(a)[id(a)] - b_hash = DeepHash(b)[id(b)] + a_hash = DeepHash(a)[a] + b_hash = DeepHash(b)[b] assert a_hash == b_hash def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): a = 'hello' b = b'hello' - a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[id(a)] - b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[id(b)] + a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[a] + b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[b] assert a_hash == b_hash From 2599fda82b633237867844e76f78df3ff62fb9b0 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 8 Dec 2018 14:40:57 -0800 Subject: [PATCH 21/76] wip --- deepdiff/contenthash.py | 33 ++--------- deepdiff/diff.py | 11 ++-- deepdiff/helper.py | 31 +++++++++-- deepdiff/search.py | 2 +- tests/test_diff_tree.py | 118 ++++++++++++++++++++-------------------- tests/test_search.py | 5 +- 6 files changed, 95 insertions(+), 105 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 7fbe796e..def9e240 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -8,7 +8,7 @@ import mmh3 import logging -from deepdiff.helper import strings, numbers +from deepdiff.helper import strings, numbers, unprocessed, skipped, not_hashed logger = logging.getLogger(__name__) @@ -16,30 +16,6 @@ RESERVED_DICT_KEYS = {UNPROCESSED} -class OtherTypes: - def __repr__(self): - return "Error: {}".format(self.__class__.__name__) # pragma: no cover - - __str__ = __repr__ - - -class Skipped(OtherTypes): - pass - - -class Unprocessed(OtherTypes): - pass - - -class NotHashed(OtherTypes): - pass - - -unprocessed = Unprocessed() -skipped = Skipped() -not_hashed = NotHashed() - - def prepare_string_for_hashing(obj, include_string_type_changes=False): """ Clean type conversions @@ -143,7 +119,7 @@ class DeepHash(dict): def __init__(self, obj, hashes=None, - exclude_types=set(), + exclude_types=None, hasher=None, ignore_repetition=True, significant_digits=None, @@ -156,9 +132,8 @@ def __init__(self, "The valid parameters are obj, hashes, exclude_types." "hasher and ignore_repetition.") % ', '.join(kwargs.keys())) self.obj = obj - self.exclude_types = set(exclude_types) - self.exclude_types_tuple = tuple( - exclude_types) # we need tuple for checking isinstance + exclude_types = set() if exclude_types is None else set(exclude_types) + self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition self.hasher = hash if hasher is None else hasher diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 187e470f..7f6bbaab 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -16,16 +16,13 @@ from collections.abc import Mapping, Iterable -from deepdiff.helper import py3, strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose +from deepdiff.helper import py3, strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel -from deepdiff.model import DictRelationship, AttributeRelationship # , REPORT_KEYS +from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship from deepdiff.contenthash import DeepHash -if py3: # pragma: no cover - from itertools import zip_longest -else: # pragma: no cover - from itertools import izip_longest as zip_longest +from itertools import zip_longest logger = logging.getLogger(__name__) @@ -998,7 +995,7 @@ def __create_hashtable(self, t, level): "Not counting this object.\n %s" % (level.path(), e)) else: - if item_hash is hashes_all.unprocessed: # pragma: no cover + if item_hash is unprocessed: # pragma: no cover logger.warning("Item %s was not processed while hashing " "thus not counting this object." % level.path()) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 3d30a1d2..15959d9b 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -57,21 +57,40 @@ class ListItemRemovedOrAdded(object): # pragma: no cover pass -class NotPresent(object): # pragma: no cover +class OtherTypes: + def __repr__(self): + return "Error: {}".format(self.__class__.__name__) # pragma: no cover + + __str__ = __repr__ + + +class Skipped(OtherTypes): + pass + + +class Unprocessed(OtherTypes): + pass + + +class NotHashed(OtherTypes): + pass + + +class NotPresent(OtherTypes): # pragma: no cover """ In a change tree, this indicated that a previously existing object has been removed -- or will only be added in the future. We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D """ - def __repr__(self): - return "Not Present" - - def __str__(self): - return self.__repr__() + pass +unprocessed = Unprocessed() +skipped = Skipped() +not_hashed = NotHashed() notpresent = NotPresent() + WARNING_NUM = 0 diff --git a/deepdiff/search.py b/deepdiff/search.py index f3f33494..95555296 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -94,7 +94,7 @@ def __init__(self, self.case_sensitive = case_sensitive if isinstance(item, strings) else True item = item if self.case_sensitive else item.lower() self.exclude_paths = set(exclude_paths) - self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in set(exclude_regex_paths)] + self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] self.exclude_types = set(exclude_types) self.exclude_types_tuple = tuple( exclude_types) # we need tuple for checking isinstance diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index 6e68fc23..0bad2236 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -25,7 +25,7 @@ logging.disable(logging.CRITICAL) -class DeepDiffTreeTestCase: +class TestDeepDiffTree: """DeepDiff Tests.""" def test_same_objects(self): @@ -33,19 +33,19 @@ def test_same_objects(self): t2 = t1 ddiff = DeepDiff(t1, t2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} def test_significant_digits_signed_zero(self): t1 = 0.00001 t2 = -0.0001 ddiff = DeepDiff(t1, t2, significant_digits=2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} t1 = 1 * 10**-12 t2 = -1 * 10**-12 ddiff = DeepDiff(t1, t2, significant_digits=10) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} def test_item_added_extensive(self): t1 = {'one': 1, 'two': 2, 'three': 3, 'four': 4} @@ -53,59 +53,57 @@ def test_item_added_extensive(self): ddiff = DeepDiff(t1, t2) res = ddiff.tree (key, ) = res.keys() - self.assertEqual(key, 'dictionary_item_added') - self.assertEqual(len(res['dictionary_item_added']), 1) + assert key == 'dictionary_item_added' + assert len(res['dictionary_item_added']) == 1 (added1, ) = res['dictionary_item_added'] # assert added1 DiffLevel chain is valid at all - self.assertEqual(added1.up.down, added1) - self.assertIsNone(added1.down) - self.assertIsNone(added1.up.up) - self.assertEqual(added1.all_up, added1.up) - self.assertEqual(added1.up.all_down, added1) - self.assertEqual(added1.report_type, 'dictionary_item_added') + assert added1.up.down == added1 + assert added1.down is None + assert added1.up.up is None + assert added1.all_up == added1.up + assert added1.up.all_down == added1 + assert added1.report_type == 'dictionary_item_added' # assert DiffLevel chain points to the objects we entered - self.assertEqual(added1.up.t1, t1) - self.assertEqual(added1.up.t2, t2) + assert added1.up.t1 == t1 + assert added1.up.t2 == t2 - self.assertEqual(added1.t1, notpresent) - self.assertEqual(added1.t2, 1337) + assert added1.t1 is notpresent + assert added1.t2 == 1337 # assert DiffLevel child relationships are correct - self.assertIsNone(added1.up.t1_child_rel) - self.assertIsInstance(added1.up.t2_child_rel, DictRelationship) - self.assertEqual(added1.up.t2_child_rel.parent, added1.up.t2) - self.assertEqual(added1.up.t2_child_rel.child, added1.t2) - self.assertEqual(added1.up.t2_child_rel.param, 'new') + assert added1.up.t1_child_rel is None + assert isinstance(added1.up.t2_child_rel, DictRelationship) + assert added1.up.t2_child_rel.parent == added1.up.t2 + assert added1.up.t2_child_rel.child == added1.t2 + assert added1.up.t2_child_rel.param == 'new' - self.assertEqual(added1.up.path(), "root") - self.assertEqual(added1.path(), "root['new']") + assert added1.up.path() == "root" + assert added1.path() == "root['new']" def test_item_added_and_removed(self): t1 = {'one': 1, 'two': 2, 'three': 3, 'four': 4} t2 = {'one': 1, 'two': 4, 'three': 3, 'five': 5, 'six': 6} ddiff = DeepDiff(t1, t2, view='tree') - self.assertEqual( - set(ddiff.keys()), { - 'dictionary_item_added', 'dictionary_item_removed', - 'values_changed' - }) - self.assertEqual(len(ddiff['dictionary_item_added']), 2) - self.assertEqual(len(ddiff['dictionary_item_removed']), 1) + assert set(ddiff.keys()) == { + 'dictionary_item_added', 'dictionary_item_removed', + 'values_changed' + } + assert len(ddiff['dictionary_item_added']) == 2 + assert len(ddiff['dictionary_item_removed']) == 1 def test_item_added_and_removed2(self): t1 = {2: 2, 4: 4} t2 = {2: "b", 5: 5} ddiff = DeepDiff(t1, t2, view='tree') - self.assertEqual( - set(ddiff.keys()), { - 'dictionary_item_added', 'dictionary_item_removed', - 'type_changes' - }) - self.assertEqual(len(ddiff['dictionary_item_added']), 1) - self.assertEqual(len(ddiff['dictionary_item_removed']), 1) + assert set(ddiff.keys()), { + 'dictionary_item_added', 'dictionary_item_removed', + 'type_changes' + } + assert len(ddiff['dictionary_item_added']) == 1 + assert len(ddiff['dictionary_item_removed']) == 1 def test_non_subscriptable_iterable(self): t1 = (i for i in [42, 1337, 31337]) @@ -116,18 +114,18 @@ def test_non_subscriptable_iterable(self): ddiff = DeepDiff(t1, t2, view='tree') (change, ) = ddiff['iterable_item_removed'] - self.assertEqual(set(ddiff.keys()), {'iterable_item_removed'}) - self.assertEqual(len(ddiff['iterable_item_removed']), 1) + assert set(ddiff.keys()) == {'iterable_item_removed'} + assert len(ddiff['iterable_item_removed']) == 1 - self.assertEqual(change.up.t1, t1) - self.assertEqual(change.up.t2, t2) - self.assertEqual(change.report_type, 'iterable_item_removed') - self.assertEqual(change.t1, 31337) - self.assertEqual(change.t2, notpresent) + assert change.up.t1 == t1 + assert change.up.t2 == t2 + assert change.report_type == 'iterable_item_removed' + assert change.t1 == 31337 + assert change.t2 is notpresent - self.assertIsInstance(change.up.t1_child_rel, - NonSubscriptableIterableRelationship) - self.assertIsNone(change.up.t2_child_rel) + assert isinstance(change.up.t1_child_rel, + NonSubscriptableIterableRelationship) + assert change.up.t2_child_rel is None def test_non_subscriptable_iterable_path(self): t1 = (i for i in [42, 1337, 31337]) @@ -136,9 +134,9 @@ def test_non_subscriptable_iterable_path(self): (change, ) = ddiff['iterable_item_removed'] # testing path - self.assertEqual(change.path(), None) - self.assertEqual(change.path(force='yes'), 'root(unrepresentable)') - self.assertEqual(change.path(force='fake'), 'root[2]') + assert change.path() is None + assert change.path(force='yes') == 'root(unrepresentable)' + assert change.path(force='fake') == 'root[2]' def test_significant_digits(self): ddiff = DeepDiff( @@ -146,7 +144,7 @@ def test_significant_digits(self): [0.013, 0.99], significant_digits=1, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_significant_digits_with_sets(self): ddiff = DeepDiff( @@ -154,7 +152,7 @@ def test_significant_digits_with_sets(self): {0.013, 0.99}, significant_digits=1, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_significant_digits_with_ignore_order(self): ddiff = DeepDiff( @@ -162,7 +160,7 @@ def test_significant_digits_with_ignore_order(self): significant_digits=1, ignore_order=True, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_repr(self): t1 = {1, 2, 8} @@ -174,7 +172,7 @@ def test_repr(self): self.fail("Converting ddiff to string raised: {}".format(e)) -class DeepDiffTreeWithNumpyTestCase: +class TestDeepDiffTreeWithNumpy: """DeepDiff Tests with Numpy.""" def setUp(self): @@ -185,21 +183,21 @@ def setUp(self): self.d1 = {'np': a1} self.d2 = {'np': a2} - @pytest.mark.skipif(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, reason="Numpy is not compatible with pypy3") def test_diff_with_numpy(self): ddiff = DeepDiff(self.d1, self.d2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} - @pytest.mark.skipif(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, reason="Numpy is not compatible with pypy3") def test_diff_with_empty_seq(self): a1 = {"empty": []} a2 = {"empty": []} ddiff = DeepDiff(a1, a2) - self.assertEqual(ddiff, {}) + assert ddiff == {} -class DeepAdditionsTestCase: +class TestDeepAdditions: """Tests for Additions and Subtractions.""" @pytest.mark.skip(reason="Not currently implemented") @@ -208,4 +206,4 @@ def test_adding_list_diff(self): t2 = [1, 2, 3, 5] ddiff = DeepDiff(t1, t2, view='tree') addition = ddiff + t1 - self.assertEqual(addition, t2) + assert addition == t2 diff --git a/tests/test_search.py b/tests/test_search.py index 0f2d7e0d..94ae964b 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -243,13 +243,13 @@ def test_skip_type_str(self): def test_skip_regexp(self): obj = [{'a': 1, 'b': "somewhere"}, {'c': 4, 'b': "somewhere"}] - ds = DeepSearch(obj, item, exclude_regex_paths=["root\[\d+\]"]) + ds = DeepSearch(obj, item, exclude_regex_paths=[r"root\[\d+\]"]) result = {} self.assertEqual(ds, result) def test_skip_regexp2(self): obj = {'a': [1, 2, [3, [item]]]} - ds = DeepSearch(obj, item, exclude_regex_paths=["\[\d+\]"]) + ds = DeepSearch(obj, item, exclude_regex_paths=[r"\[\d+\]"]) result = {} self.assertEqual(ds, result) @@ -324,6 +324,7 @@ def __eq__(self, other): obj = AlwaysEqual() item = AlwaysEqual() result = {'matched_values': {'root', 'root.some_attr'}} + self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) def test_search_inherited_attributes(self): class Parent(object): From 05fa417d8098134efb8a2c5c03be7c6eaca09489 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 8 Dec 2018 22:31:39 -0800 Subject: [PATCH 22/76] fixing tests --- deepdiff/helper.py | 2 +- requirements-dev.txt | 2 + tests/test_diff_text.py | 250 +++++++++++++++++++--------------------- tests/test_diff_tree.py | 15 +-- 4 files changed, 126 insertions(+), 143 deletions(-) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 15959d9b..0d38a69f 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -19,7 +19,7 @@ py3 = True if py2: # pragma: no cover - sys.exit('Python 2 is not supported. The last version of DeepDiff that supported Py2 was 3.3.0') + sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') pypy3 = py3 and hasattr(sys, "pypy_translation_info") diff --git a/requirements-dev.txt b/requirements-dev.txt index b957c99f..ac0539d7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1 +1,3 @@ pytest==4.0.1 +pytest-cov==2.6.0 +numpy==1.15.4 diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index eb84faa4..2733efd8 100644 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -19,34 +19,30 @@ or using nosetests: nosetests tests/test_diff_text.py:DeepDiffTestCase.test_diff_when_hash_fails """ -import unittest import datetime +import pytest +import logging from decimal import Decimal from deepdiff import DeepDiff from deepdiff.helper import py3 from tests import CustomClass -if py3: - from unittest import mock -else: - import mock - -import logging +from unittest import mock logging.disable(logging.CRITICAL) -class DeepDiffTextTestCase(unittest.TestCase): +class TestDeepDiffText: """DeepDiff Tests.""" def test_same_objects(self): t1 = {1: 1, 2: 2, 3: 3} t2 = t1 - self.assertEqual(DeepDiff(t1, t2), {}) + assert {} == DeepDiff(t1, t2) def test_item_type_change(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: "2", 3: 3} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, { + assert { 'type_changes': { "root[2]": { "old_value": 2, @@ -55,20 +51,17 @@ def test_item_type_change(self): "new_type": str } } - }) + } == ddiff def test_item_type_change_less_verbose(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: "2", 3: 3} - self.assertEqual( - DeepDiff( - t1, t2, verbose_level=0), - {'type_changes': { + assert {'type_changes': { "root[2]": { "old_type": int, "new_type": str } - }}) + }} == DeepDiff(t1, t2, verbose_level=0) def test_value_change(self): t1 = {1: 1, 2: 2, 3: 3} @@ -81,7 +74,7 @@ def test_value_change(self): } } } - self.assertEqual(DeepDiff(t1, t2), result) + assert result == DeepDiff(t1, t2) def test_item_added_and_removed(self): t1 = {1: 1, 2: 2, 3: 3, 4: 4} @@ -97,7 +90,7 @@ def test_item_added_and_removed(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_item_added_and_removed_verbose(self): t1 = {1: 1, 3: 3, 4: 4} @@ -112,7 +105,7 @@ def test_item_added_and_removed_verbose(self): 'root[5]': 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_diffs_dates(self): t1 = datetime.date(2016, 8, 8) @@ -126,7 +119,7 @@ def test_diffs_dates(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_diffs_timedeltas(self): t1 = datetime.timedelta(days=1, seconds=12) @@ -141,10 +134,10 @@ def test_diffs_timedeltas(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff ddiff = DeepDiff(t1, t3) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_string_difference(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world"}} @@ -162,7 +155,7 @@ def test_string_difference(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_diffs_equal_strings_when_not_identical(self): t1 = 'hello' @@ -170,7 +163,7 @@ def test_diffs_equal_strings_when_not_identical(self): t2 += 'lo' assert t1 is not t2 ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_string_difference2(self): t1 = { @@ -194,7 +187,7 @@ def test_string_difference2(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_bytes(self): t1 = { @@ -216,6 +209,7 @@ def test_bytes(self): "c": b'\x81', } } + ddiff = DeepDiff(t1, t2) result = { 'values_changed': { @@ -231,7 +225,7 @@ def test_bytes(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode(self): t1 = { @@ -255,7 +249,7 @@ def test_unicode(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_type_change(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} @@ -271,7 +265,7 @@ def test_type_change(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference(self): t1 = { @@ -291,14 +285,14 @@ def test_list_difference(self): "root[4]['b'][3]": 'to_be_removed2' } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_add(self): t1 = [1, 2] t2 = [1, 2, 3, 5] ddiff = DeepDiff(t1, t2) result = {'iterable_item_added': {'root[2]': 3, 'root[3]': 5}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference2(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3, 10, 12]}} @@ -320,7 +314,7 @@ def test_list_difference2(self): } } ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference3(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 5]}} @@ -341,7 +335,7 @@ def test_list_difference3(self): "root[4]['b'][3]": 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference4(self): # TODO: Look into Levenshtein algorithm @@ -364,31 +358,31 @@ def test_list_difference4(self): 'root[4]': 'e' } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_ignore_order(self): t1 = {1: 1, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 4: {"a": "hello", "b": [1, 3, 2, 3]}} ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_dictionary_difference_ignore_order(self): t1 = {"a": [[{"b": 2, "c": 4}, {"b": 2, "c": 3}]]} t2 = {"a": [[{"b": 2, "c": 3}, {"b": 2, "c": 4}]]} ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_ignore_order(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3, 3], 2, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_difference_ignore_order(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3], 2, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_with_dictionarry_difference_ignore_order(self): t1 = [1, 2, [3, 4, {1: 2}]] @@ -397,7 +391,7 @@ def test_nested_list_with_dictionarry_difference_ignore_order(self): ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_ignore_order_report_repetition(self): t1 = [1, 3, 1, 4] @@ -424,7 +418,7 @@ def test_list_difference_ignore_order_report_repetition(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff # TODO: fix repeition report def test_nested_list_ignore_order_report_repetition_wrong_currently(self): @@ -442,13 +436,13 @@ def test_nested_list_ignore_order_report_repetition_wrong_currently(self): } } } - self.assertNotEqual(ddiff, result) + assert result != ddiff def test_list_of_unhashable_difference_ignore_order(self): t1 = [{"a": 2}, {"b": [3, 4, {1: 1}]}] t2 = [{"b": [3, 4, {1: 1}]}, {"a": 2}] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_list_of_unhashable_difference_ignore_order2(self): t1 = [1, {"a": 2}, {"b": [3, 4, {1: 1}]}, "B"] @@ -465,7 +459,7 @@ def test_list_of_unhashable_difference_ignore_order2(self): 'root[0]': 1 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order3(self): t1 = [1, {"a": 2}, {"a": 2}, {"b": [3, 4, {1: 1}]}, "B"] @@ -485,7 +479,7 @@ def test_list_of_unhashable_difference_ignore_order3(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order_report_repetition( self): @@ -509,14 +503,14 @@ def test_list_of_unhashable_difference_ignore_order_report_repetition( } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order4(self): t1 = [{"a": 2}, {"a": 2}] t2 = [{"a": 2}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order_report_repetition2( self): @@ -536,14 +530,14 @@ def test_list_of_unhashable_difference_ignore_order_report_repetition2( } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order(self): t1 = [{1}, {2}, {3}] t2 = [{4}, {1}, {2}, {3}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order_when_there_is_duplicate( self): @@ -551,7 +545,7 @@ def test_list_of_sets_difference_ignore_order_when_there_is_duplicate( t2 = [{4}, {1}, {2}, {3}, {3}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order_when_there_is_duplicate_and_mix_of_hashable_unhashable( self): @@ -559,14 +553,14 @@ def test_list_of_sets_difference_ignore_order_when_there_is_duplicate_and_mix_of t2 = [{4}, 1, {2}, {3}, {3}, 1, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_set_of_none(self): """ https://github.com/seperman/deepdiff/issues/64 """ ddiff = DeepDiff(set(), set([None])) - self.assertEqual(ddiff, {'set_item_added': {'root[None]'}}) + assert {'set_item_added': {'root[None]'}} == ddiff def test_list_that_contains_dictionary(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, {1: 1, 2: 2}]}} @@ -581,7 +575,7 @@ def test_list_that_contains_dictionary(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_dictionary_of_list_of_dictionary_ignore_order(self): t1 = { @@ -605,7 +599,7 @@ def test_dictionary_of_list_of_dictionary_ignore_order(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_comprehensive_ignore_order(self): @@ -638,7 +632,7 @@ def test_comprehensive_ignore_order(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_ignore_order_when_objects_similar(self): """ @@ -696,7 +690,7 @@ def test_ignore_order_when_objects_similar(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, { + assert { 'iterable_item_removed': { "root['key2'][1]": { 'key5': 'val5', @@ -709,7 +703,7 @@ def test_ignore_order_when_objects_similar(self): 'key6': 'val6' } } - }) + } == ddiff def test_set_ignore_order_report_repetition(self): """ @@ -724,7 +718,7 @@ def test_set_ignore_order_report_repetition(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {'root[8]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_set(self): t1 = {1, 2, 8} @@ -734,14 +728,14 @@ def test_set(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {'root[8]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_set_strings(self): t1 = {"veggies", "tofu"} t2 = {"veggies", "tofu", "seitan"} ddiff = DeepDiff(t1, t2) result = {'set_item_added': {"root['seitan']"}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_frozenset(self): t1 = frozenset([1, 2, 'B']) @@ -751,7 +745,7 @@ def test_frozenset(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {"root['B']"} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_tuple(self): t1 = (1, 2, 8) @@ -768,7 +762,7 @@ def test_tuple(self): 'root[3]': 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_named_tuples(self): from collections import namedtuple @@ -784,7 +778,7 @@ def test_named_tuples(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_change(self): t1 = CustomClass(1) @@ -798,7 +792,7 @@ def test_custom_objects_change(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_slot_change(self): class ClassA(object): @@ -819,7 +813,7 @@ def __init__(self, x, y): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_with_single_protected_slot(self): class ClassA(object): @@ -833,8 +827,8 @@ def __str__(self): t1 = ClassA() t2 = ClassA() - diff = DeepDiff(t1, t2) - self.assertEqual(diff, {}) + ddiff = DeepDiff(t1, t2) + assert {} == ddiff def get_custom_objects_add_and_remove(self): class ClassA(object): @@ -863,7 +857,7 @@ def test_custom_objects_add_and_remove(self): }, 'attribute_removed': {'root.d'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_add_and_remove_verbose(self): t1, t2 = self.get_custom_objects_add_and_remove() @@ -882,7 +876,7 @@ def test_custom_objects_add_and_remove_verbose(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def get_custom_object_with_added_removed_methods(self): class ClassA(object): @@ -910,13 +904,13 @@ def test_custom_objects_add_and_remove_method(self): ddiff = DeepDiff(t1, t2) result = {'attribute_added': {'root.method_a', 'root.method_b'}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_add_and_remove_method_verbose(self): t1, t2 = self.get_custom_object_with_added_removed_methods() ddiff = DeepDiff(t1, t2, verbose_level=2) - self.assertTrue('root.method_a' in ddiff['attribute_added']) - self.assertTrue('root.method_b' in ddiff['attribute_added']) + assert 'root.method_a' in ddiff['attribute_added'] + assert 'root.method_b' in ddiff['attribute_added'] def test_set_of_custom_objects(self): member1 = CustomClass(13, 37) @@ -925,7 +919,7 @@ def test_set_of_custom_objects(self): t2 = {member2} ddiff = DeepDiff(t1, t2) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_dictionary_of_custom_objects(self): member1 = CustomClass(13, 37) @@ -934,7 +928,7 @@ def test_dictionary_of_custom_objects(self): t2 = {1: member2} ddiff = DeepDiff(t1, t2) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop(self): class LoopTest(object): @@ -954,7 +948,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop2(self): class LoopTestA(object): @@ -979,7 +973,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop3(self): class LoopTest(object): @@ -999,7 +993,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop_in_lists(self): t1 = [1, 2, 3] @@ -1017,7 +1011,7 @@ def test_loop_in_lists(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop_in_lists2(self): t1 = [1, 2, [3]] @@ -1034,7 +1028,7 @@ def test_loop_in_lists2(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_decimal(self): t1 = {1: Decimal('10.1')} @@ -1048,14 +1042,14 @@ def test_decimal(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_decimal_ignore_order(self): t1 = [{1: Decimal('10.1')}, {2: Decimal('10.2')}] t2 = [{2: Decimal('10.2')}, {1: Decimal('10.1')}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode_string_type_changes(self): unicode_string = {"hello": u"你好"} @@ -1076,7 +1070,7 @@ def test_unicode_string_type_changes(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode_string_value_changes(self): unicode_string = {"hello": u"你好"} @@ -1100,7 +1094,7 @@ def test_unicode_string_value_changes(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode_string_value_and_type_changes(self): unicode_string = {"hello": u"你好"} @@ -1129,7 +1123,7 @@ def test_unicode_string_value_and_type_changes(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_int_to_unicode_string(self): t1 = 1 @@ -1160,7 +1154,7 @@ def test_int_to_unicode_string(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_int_to_unicode(self): t1 = 1 @@ -1191,19 +1185,19 @@ def test_int_to_unicode(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_significant_digits_for_decimals(self): t1 = Decimal('2.5') t2 = Decimal('1.5') ddiff = DeepDiff(t1, t2, significant_digits=0) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_significant_digits_for_complex_imaginary_part(self): t1 = 1.23 + 1.222254j t2 = 1.23 + 1.222256j ddiff = DeepDiff(t1, t2, significant_digits=4) - self.assertEqual(ddiff, {}) + assert {} == ddiff result = { 'values_changed': { 'root': { @@ -1213,19 +1207,19 @@ def test_significant_digits_for_complex_imaginary_part(self): } } ddiff = DeepDiff(t1, t2, significant_digits=5) - self.assertEqual(ddiff, result) + assert result == ddiff def test_significant_digits_for_complex_real_part(self): t1 = 1.23446879 + 1.22225j t2 = 1.23446764 + 1.22225j ddiff = DeepDiff(t1, t2, significant_digits=5) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_significant_digits_for_list_of_floats(self): t1 = [1.2344, 5.67881, 6.778879] t2 = [1.2343, 5.67882, 6.778878] ddiff = DeepDiff(t1, t2, significant_digits=3) - self.assertEqual(ddiff, {}) + assert {} == ddiff ddiff = DeepDiff(t1, t2, significant_digits=4) result = { 'values_changed': { @@ -1235,7 +1229,7 @@ def test_significant_digits_for_list_of_floats(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff ddiff = DeepDiff(t1, t2, significant_digits=5) result = { 'values_changed': { @@ -1249,13 +1243,13 @@ def test_significant_digits_for_list_of_floats(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff ddiff = DeepDiff(t1, t2) ddiff2 = DeepDiff(t1, t2, significant_digits=6) - self.assertEqual(ddiff, ddiff2) + assert ddiff2 == ddiff def test_negative_significant_digits(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) def test_base_level_dictionary_remapping(self): @@ -1268,36 +1262,26 @@ def test_base_level_dictionary_remapping(self): t1 = {1: 1, 2: 2} t2 = {2: 2, 3: 3} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff['dic_item_added'], - ddiff['dictionary_item_added']) - self.assertEqual(ddiff['dic_item_removed'], - ddiff['dictionary_item_removed']) + assert ddiff['dic_item_added'] == ddiff['dictionary_item_added'] + assert ddiff['dic_item_removed'] == ddiff['dictionary_item_removed'] def test_index_and_repeat_dictionary_remapping(self): t1 = [1, 3, 1, 4] t2 = [4, 4, 1] ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - self.assertEqual(ddiff['repetition_change']['root[0]']['newindexes'], - ddiff['repetition_change']['root[0]']['new_indexes']) - self.assertEqual(ddiff['repetition_change']['root[0]']['newrepeat'], - ddiff['repetition_change']['root[0]']['new_repeat']) - self.assertEqual(ddiff['repetition_change']['root[0]']['oldindexes'], - ddiff['repetition_change']['root[0]']['old_indexes']) - self.assertEqual(ddiff['repetition_change']['root[0]']['oldrepeat'], - ddiff['repetition_change']['root[0]']['old_repeat']) + assert ddiff['repetition_change']['root[0]']['newindexes'] == ddiff['repetition_change']['root[0]']['new_indexes'] + assert ddiff['repetition_change']['root[0]']['newrepeat'] == ddiff['repetition_change']['root[0]']['new_repeat'] + assert ddiff['repetition_change']['root[0]']['oldindexes'] == ddiff['repetition_change']['root[0]']['old_indexes'] + assert ddiff['repetition_change']['root[0]']['oldrepeat'] == ddiff['repetition_change']['root[0]']['old_repeat'] def test_value_and_type_dictionary_remapping(self): t1 = {1: 1, 2: 2} t2 = {1: 1, 2: '2'} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff['type_changes']['root[2]']['newtype'], - ddiff['type_changes']['root[2]']['new_type']) - self.assertEqual(ddiff['type_changes']['root[2]']['newvalue'], - ddiff['type_changes']['root[2]']['new_value']) - self.assertEqual(ddiff['type_changes']['root[2]']['oldtype'], - ddiff['type_changes']['root[2]']['old_type']) - self.assertEqual(ddiff['type_changes']['root[2]']['oldvalue'], - ddiff['type_changes']['root[2]']['old_value']) + assert ddiff['type_changes']['root[2]']['newtype'] == ddiff['type_changes']['root[2]']['new_type'] + assert ddiff['type_changes']['root[2]']['newvalue'] == ddiff['type_changes']['root[2]']['new_value'] + assert ddiff['type_changes']['root[2]']['oldtype'] == ddiff['type_changes']['root[2]']['old_type'] + assert ddiff['type_changes']['root[2]']['oldvalue'] == ddiff['type_changes']['root[2]']['old_value'] def test_skip_type(self): l1 = logging.getLogger("test") @@ -1305,12 +1289,12 @@ def test_skip_type(self): t1 = {"log": l1, 2: 1337} t2 = {"log": l2, 2: 1337} ddiff = DeepDiff(t1, t2, exclude_types={logging.Logger}) - self.assertEqual(ddiff, {}) + assert {} == ddiff t1 = {"log": "book", 2: 1337} t2 = {"log": l2, 2: 1337} ddiff = DeepDiff(t1, t2, exclude_types={logging.Logger}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path1(self): t1 = { @@ -1322,7 +1306,7 @@ def test_skip_path1(self): "ingredients": ["veggies", "tofu", "soy sauce"] } ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path2(self): t1 = { @@ -1331,7 +1315,7 @@ def test_skip_path2(self): } t2 = {"for life": "vegan"} ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path2_reverse(self): t1 = { @@ -1340,7 +1324,7 @@ def test_skip_path2_reverse(self): } t2 = {"for life": "vegan"} ddiff = DeepDiff(t2, t1, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path4(self): t1 = { @@ -1349,29 +1333,29 @@ def test_skip_path4(self): } t2 = {"for life": "vegan", "zutaten": ["veggies", "tofu", "soy sauce"]} ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertTrue('dictionary_item_added' in ddiff, {}) - self.assertTrue('dictionary_item_removed' not in ddiff, {}) + assert 'dictionary_item_added' in ddiff, {} + assert 'dictionary_item_removed' not in ddiff, {} def test_skip_custom_object_path(self): t1 = CustomClass(1) t2 = CustomClass(2) ddiff = DeepDiff(t1, t2, exclude_paths=['root.a']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_list_path(self): t1 = ['a', 'b'] t2 = ['a'] ddiff = DeepDiff(t1, t2, exclude_paths=['root[1]']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_dictionary_path(self): t1 = {1: {2: "a"}} t2 = {1: {}} ddiff = DeepDiff(t1, t2, exclude_paths=['root[1][2]']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_dictionary_path_with_custom_object(self): obj1 = CustomClass(1) @@ -1381,28 +1365,28 @@ def test_skip_dictionary_path_with_custom_object(self): t2 = {1: {2: obj2}} ddiff = DeepDiff(t1, t2, exclude_paths=['root[1][2].a']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_regexp(self): t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - ddiff = DeepDiff(t1, t2, exclude_regex_paths=["root\[\d+\]\['b'\]"]) + ddiff = DeepDiff(t1, t2, exclude_regex_paths=[r"root\[\d+\]\['b'\]"]) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_str_type_in_dictionary(self): t1 = {1: {2: "a"}} t2 = {1: {}} ddiff = DeepDiff(t1, t2, exclude_types=[str]) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_unknown_parameters(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepDiff(1, 1, wrong_param=2) def test_bad_attribute(self): - class Bad(object): + class Bad: __slots__ = ['x', 'y'] def __getattr__(self, key): @@ -1416,7 +1400,7 @@ def __str__(self): ddiff = DeepDiff(t1, t2) result = {'unprocessed': ['root: Bad Object and Bad Object']} - self.assertEqual(ddiff, result) + assert result == ddiff def test_dict_none_item_removed(self): t1 = {1: None, 2: 2} @@ -1425,7 +1409,7 @@ def test_dict_none_item_removed(self): result = { 'dictionary_item_removed': {'root[1]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_none_item_removed(self): t1 = [1, 2, None] @@ -1434,7 +1418,7 @@ def test_list_none_item_removed(self): result = { 'iterable_item_removed': {'root[2]': None} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_non_subscriptable_iterable(self): def gen1(): @@ -1453,7 +1437,7 @@ def gen2(): result = {'iterable_item_removed': {'root[2]': 31337}} # Note: In text-style results, we currently pretend this stuff is subscriptable for readability - self.assertEqual(ddiff, result) + assert result == ddiff @mock.patch('deepdiff.diff.logger') @mock.patch('deepdiff.diff.DeepHash') diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index 0bad2236..ca4e63ee 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -175,17 +175,14 @@ def test_repr(self): class TestDeepDiffTreeWithNumpy: """DeepDiff Tests with Numpy.""" - def setUp(self): - if not pypy3: - import numpy as np - a1 = np.array([1.23, 1.66, 1.98]) - a2 = np.array([1.23, 1.66, 1.98]) - self.d1 = {'np': a1} - self.d2 = {'np': a2} - @pytest.mark.skipif(pypy3, reason="Numpy is not compatible with pypy3") def test_diff_with_numpy(self): - ddiff = DeepDiff(self.d1, self.d2) + import numpy as np + a1 = np.array([1.23, 1.66, 1.98]) + a2 = np.array([1.23, 1.66, 1.98]) + d1 = {'np': a1} + d2 = {'np': a2} + ddiff = DeepDiff(d1, d2) res = ddiff.tree assert res == {} From ae26d26d9f1cb2e6664dfbca63ff8474858b1137 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 8 Dec 2018 22:44:28 -0800 Subject: [PATCH 23/76] bumping version --- README.md | 6 +- README.txt | 306 ----------------------------------------------- deepdiff/diff.py | 8 +- docs/conf.py | 4 +- docs/index.rst | 2 +- setup.py | 12 +- 6 files changed, 13 insertions(+), 325 deletions(-) delete mode 100644 README.txt diff --git a/README.md b/README.md index 48730adb..78c209e2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 3.5.0 +# DeepDiff v 4.0.0 ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -8,7 +8,9 @@ [![Coverage Status](https://coveralls.io/repos/github/seperman/deepdiff/badge.svg?branch=master)](https://coveralls.io/github/seperman/deepdiff?branch=master) Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. -Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 +Tested on Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 + +**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** ## Table of Contents diff --git a/README.txt b/README.txt deleted file mode 100644 index 5ca7b687..00000000 --- a/README.txt +++ /dev/null @@ -1,306 +0,0 @@ -**DeepDiff v 3.5.0** - -Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. - -Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 - -Note: Checkout the github repo's readme for complete coverage of features: -https://github.com/seperman/deepdiff - -**Parameters** - -In addition to the 2 objects being compared: - -- ignore_order -- report_repetition -- verbose_level - -**Returns** - - A DeepDiff object that has already calculated the difference of the 2 items. - -**Supported data types** - -int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - -**Examples** - - -Importing - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> from __future__ import print_function # In case running on Python 2 - -Same object returns empty - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = t1 - >>> print(DeepDiff(t1, t2)) - {} - -Type of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} - -Value of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - -Item added and/or removed - >>> t1 = {1:1, 2:2, 3:3, 4:4} - >>> t2 = {1:1, 2:4, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff) - {'dictionary_item_added': ['root[5]', 'root[6]'], - 'dictionary_item_removed': ['root[4]'], - 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - -String difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} - >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} - - -String difference 2 - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - - >>> - >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ - @@ -1,5 +1,4 @@ - -world! - -Goodbye! - +world - 1 - 2 - End - -Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - -List difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} - -List difference 2: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} - -List difference ignoring order or duplicates: (with the same dictionaries as above) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, ignore_order=True) - >>> print (ddiff) - {} - -List that contains dictionary: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': ["root[4]['b'][2][2]"], - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} - -Sets: - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (DeepDiff(t1, t2)) - {'set_item_added': ['root[3]', 'root[5]'], 'set_item_removed': ['root[8]']} - -Named Tuples: - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> pprint (DeepDiff(t1, t2)) - {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} - -Custom objects: - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - -Object attribute added: - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': ['root.c'], - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - -Exclude certain types from comparison: - >>> l1 = logging.getLogger("test") - >>> l2 = logging.getLogger("test2") - >>> t1 = {"log": l1, 2: 1337} - >>> t2 = {"log": l2, 2: 1337} - >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) - {} - -Exclude part of your object tree from comparison: - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) - {} - -You can also exclude regular expression : - >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] - >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) - {} - -Using DeepDiff in unit tests -result is the output of the function that is being tests. -expected is the expected output of the function. - >>> assertEqual(DeepDiff(result, expected), {}) - - -**Difference with Json Patch** - -Unlike Json Patch https://tools.ietf.org/html/rfc6902 which is designed only for Json objects, DeepDiff is designed specifically for almost all Python types. In addition to that, DeepDiff checks for type changes and attribute value changes that Json Patch does not cover since there are no such things in Json. Last but not least, DeepDiff gives you the exact path of the item(s) that were changed in Python syntax. - -Example in Json Patch for replacing: - { "op": "replace", "path": "/a/b/c", "value": 42 } - -Example in DeepDiff for the same operation: - >>> item1 = {'a':{'b':{'c':'foo'}}} - >>> item2 = {'a':{'b':{'c':42}}} - >>> DeepDiff(item1, item2) - {'type_changes': {"root['a']['b']['c']": {'old_type': , 'new_value': 42, 'old_value': 'foo', 'new_type': >> t1 = {1: 1, 2: 2, 3: 3} - >>> t2 = {1: 1, 2: "2", 3: 3} - >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json - >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True - - -**Pycon 2016** - -I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think: - -Diff It To Dig It Video -https://www.youtube.com/watch?v=J5r99eJIxF4 -And here is more info: -http://zepworks.com/blog/diff-it-to-digg-it/ - - -**Changelog** - -- v3-5-0: Exclude regex path -- v3-3-0: Searching for objects and class attributes -- v3-2-2: Adding help(deepdiff) -- v3-2-1: Fixing hash of None -- v3-2-0: Adding grep for search: object | grep(item) -- v3-1-3: Unicode vs. Bytes default fix -- v3-1-2: NotPresent Fix when item is added or removed. -- v3-1-1: Bug fix when item value is None (#58) -- v3-1-0: Serialization to/from json -- v3-0-0: Introducing Tree View -- v2-5-3: Bug fix on logging for content hash. -- v2-5-2: Bug fixes on content hash. -- v2-5-0: Adding ContentHash module to fix ignore_order once and for all. -- v2-1-0: Adding Deep Search. Now you can search for item in an object. -- v2-0-0: Exclusion patterns better coverage. Updating docs. -- v1-8-0: Exclusion patterns. -- v1-7-0: Deep Set comparison. -- v1-6-0: Unifying key names. i.e newvalue is new_value now. For backward compatibility, newvalue still works. -- v1-5-0: Fixing ignore order containers with unordered items. Adding significant digits when comparing decimals. Changes property is deprecated. -- v1-1-0: Changing Set, Dictionary and Object Attribute Add/Removal to be reported as Set instead of List. Adding Pypy compatibility. -- v1-0-2: Checking for ImmutableMapping type instead of dict -- v1-0-1: Better ignore order support -- v1-0-0: Restructuring output to make it more useful. This is NOT backward compatible. -- v0-6-1: Fixiing iterables with unhashable when order is ignored -- v0-6-0: Adding unicode support -- v0-5-9: Adding decimal support -- v0-5-8: Adding ignore order of unhashables support -- v0-5-7: Adding ignore order support -- v0-5-6: Adding slots support -- v0-5-5: Adding loop detection - -**Authors** -Sep Dehpour - -Github: https://github.com/seperman -Linkedin: http://www.linkedin.com/in/sepehr -ZepWorks: http://www.zepworks.com -Article about Deepdiff: http://zepworks.com/blog/diff-it-to-digg-it/ - -Victor Hahn Castell - -- [hahncastell.de](http://hahncastell.de) -- [flexoptix.net](http://www.flexoptix.net) - -Also thanks to: - -- nfvs for Travis-CI setup script. -- brbsix for initial Py3 porting. -- WangFenjin for unicode support. -- timoilya for comparing list of sets when ignoring order. -- Bernhard10 for significant digits comparison. -- b-jazz for PEP257 cleanup, Standardize on full names, fixing line endings. -- finnhughes for fixing __slots__ -- moloney for Unicode vs. Bytes default -- serv-inc for adding help(deepdiff) -- movermeyer for updating docs -- maxrothman for search in inherited class attributes -- maxrothman for search for types/objects -- MartyHub for exclude regex paths diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 7f6bbaab..8bc94f53 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -16,7 +16,8 @@ from collections.abc import Mapping, Iterable -from deepdiff.helper import py3, strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed +from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, + IndexedHash, Verbose, unprocessed) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship @@ -1162,10 +1163,5 @@ def from_json(self, value): if __name__ == "__main__": # pragma: no cover - if not py3: - import sys - sys.exit( - "Please run with Python 3 to verify the doc strings: python3 -m deepdiff.diff" - ) import doctest doctest.testmod() diff --git a/docs/conf.py b/docs/conf.py index 6cf5431b..5697e04f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '3.5.0' +version = '4.0.0' # The full version, including alpha/beta/rc tags. -release = '3.5.0' +release = '4.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 4f27625f..1477a3b2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,7 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -DeepDiff 3.5.0 documentation! +DeepDiff 4.0.0 documentation! ============================= **DeepDiff: Deep Difference of dictionaries, iterables and almost any other object recursively.** diff --git a/setup.py b/setup.py index 568b23c5..e9c1ebc0 100755 --- a/setup.py +++ b/setup.py @@ -28,11 +28,8 @@ def get_reqs(filename): reqs = get_reqs("requirements.txt") -try: - with open('README.rst') as file: - long_description = file.read() -except Exception: - long_description = "Deep Difference and Search of any Python object/data." +with open('README.md') as file: + long_description = file.read() setup(name='deepdiff', @@ -48,9 +45,8 @@ def get_reqs(filename): test_suite="tests", tests_require=['mock'], # 'numpy==1.11.2' numpy is needed but comes already installed with travis long_description=long_description, - install_requires=[ - 'mmh3>=2.5.1' - ], + long_description_content_type='text/markdown', + install_requires=reqs, classifiers=[ "Intended Audience :: Developers", "Operating System :: OS Independent", From c92af1fb87fcbfdf67ad4784b81cb08cc3298e97 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 8 Dec 2018 23:59:13 -0800 Subject: [PATCH 24/76] updating docs --- README.md | 98 ++++++++++++++++++++++++++++++++++++++--- deepdiff/contenthash.py | 73 +++++++++++++++++++++++++----- deepdiff/diff.py | 9 +++- docs/index.rst | 4 +- 4 files changed, 164 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 78c209e2..20a44290 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Tested on Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 - [Significant Digits](#significant-digits) - [Verbose Level](#verbose-level) - [Deep Search](#deep-search) +- [Deep Hash](#deep-hash) - [Using DeepDiff in unit tests](#using-deepdiff-in-unit-tests) - [Difference with Json Patch](#difference-with-json-patch) - [Views](#views) @@ -131,7 +132,7 @@ You can also exclude using regular expressions by using `exclude_regex_paths` an ```python >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] ->>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) +>>> print (DeepDiff(t1, t2, exclude_regex_paths={r"root\[\d+\]\['b'\]"})) {} ``` @@ -140,7 +141,7 @@ example 2: ```python >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} ->>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) +>>> DeepDiff(t1, t2, exclude_regex_paths={r"\['foo.'\]"}) {} ``` @@ -252,16 +253,100 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` -## Using DeepDiff in unit tests +# Deep Hash +(New in v4-0-0) + +DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! +DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. + +Let's say you have a dictionary object. + +```py +>>> from deepdiff import DeepHash +>>> +>>> obj = {1: 2, 'a': 'b'} +``` + +If you try to hash it: + +```py +>>> hash(obj) +Traceback (most recent call last): + File "", line 1, in +TypeError: unhashable type: 'dict' +``` + +But with DeepHash: + +```py +>>> from deepdiff import DeepHash +>>> obj = {1: 2, 'a': 'b'} +>>> DeepHash(obj) +{4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} +``` + +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + +```py +>>> hashes = DeepHash(obj) +>>> hashes[obj] +(3415089864575009947, 7987229399128149852) +``` + +Which you can write as: + +```py +>>> hashes = DeepHash(obj)[obj] +``` + +At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + +The result hash is `(3415089864575009947, 7987229399128149852)`. +In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. +Using Murmur3 128bit for hashing is preferred (and is the default behaviour) +since the chance of hash collision will be minimal and hashing will be deterministic +and will not depend on the version of the Python. + +If you do a deep copy of obj, it should still give you the same hash: + +```py +>>> from copy import deepcopy +2481013017017307534 +>>> DeepHash(obj2)[obj2] +(3415089864575009947, 7987229399128149852) +``` + +Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + (3415089864575009947, 7987229399128149852) + +But if you want a different hash if string types are different, set include_string_type_changes to True: + >>> DeepHash(obj3, include_string_type_changes=True)[obj3] + (6406752576584602448, -8103933101621212760) + +# Using DeepDiff in unit tests `result` is the output of the function that is being tests. `expected` is the expected output of the function. ```python -assertEqual(DeepDiff(result, expected), {}) +self.assertEqual(DeepDiff(expected, result), {}) ``` -## Difference with Json Patch +or if you are using Pytest: + + +```python +assert not DeepDiff(expected, result) +``` + +In other words, assert that there is no diff between the expected and the result. + +# Difference with Json Patch Unlike [Json Patch](https://tools.ietf.org/html/rfc6902) which is designed only for Json objects, DeepDiff is designed specifically for almost all Python types. In addition to that, DeepDiff checks for type changes and attribute value changes that Json Patch does not cover since there are no such things in Json. Last but not least, DeepDiff gives you the exact path of the item(s) that were changed in Python syntax. @@ -311,8 +396,6 @@ So for example `ddiff['dictionary_item_removed']` is a set if strings thus this ```python >>> from deepdiff import DeepDiff ->>> from pprint import pprint ->>> from __future__ import print_function # In case running on Python 2 ``` ### Same object returns empty @@ -823,6 +906,7 @@ And here is more info: ## Change log +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index def9e240..697953e3 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -59,12 +59,13 @@ class DeepHash(dict): Note that the deepdiff diffing functionality lets this to be the default at all times. But if you are using DeepHash directly, you can set this parameter. - hasher: function. default = hash - hasher is the hashing function. The default is built-in hash function. + hasher: function. default = DeepHash.murmur3_128bit + hasher is the hashing function. The default is DeepHash.murmur3_128bit. But you can pass another hash function to it if you want. - For example the Murmur3 hash function or a cryptographic hash function. - All it needs is a function that takes the input in string format - and return the hash. + For example the Murmur3 32bit hash function or a cryptographic hash function or Python's builtin hash function. + All it needs is a function that takes the input in string format and returns the hash. + + You can use it by passing: hasher=DeepHash.murmur3 for 32bit hash and hasher=hash for Python's builtin hash. SHA1 is already provided as an alternative to the built-in hash function. You can use it by passing: hasher=DeepHash.sha1hex @@ -96,7 +97,7 @@ class DeepHash(dict): **Returns** A dictionary of {item id: item hash}. - If your object is nested, it will include hashes of all the objects it includes! + If your object is nested, it will build hashes of all the objects it contains! **Examples** @@ -106,14 +107,51 @@ class DeepHash(dict): >>> >>> obj = {1: 2, 'a': 'b'} - If you try to hash itL + If you try to hash it: >>> hash(obj) Traceback (most recent call last): File "", line 1, in TypeError: unhashable type: 'dict' But with DeepHash: - + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) + {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} + + So what is exactly the hash of obj in this case? + DeepHash is calculating the hash of the obj and any other object that obj contains. + The output of DeepHash is a dictionary of object IDs to their hashes. + In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + >>> hashes = DeepHash(obj) + >>> hashes[obj] + (3415089864575009947, 7987229399128149852) + + Which you can write as: + >>> hashes = DeepHash(obj)[obj] + + At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + + The result hash is (3415089864575009947, 7987229399128149852). + In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. + Using Murmur 3 64bit for hashing is preferred (and is the default behaviour) + since the chance of hash collision will be minimal and hashing will be deterministic + and will not depend on the version of the Python. + + If you do a deep copy of obj, it should still give you the same hash: + >>> from copy import deepcopy + 2481013017017307534 + >>> DeepHash(obj2)[obj2] + (3415089864575009947, 7987229399128149852) + + Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + (3415089864575009947, 7987229399128149852) + + But if you want a different hash if string types are different, set include_string_type_changes to True: + >>> DeepHash(obj3, include_string_type_changes=True)[obj3] + (6406752576584602448, -8103933101621212760) """ def __init__(self, @@ -136,7 +174,7 @@ def __init__(self, self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.hasher = hash if hasher is None else hasher + self.hasher = self.murmur3_128bit if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) self[UNPROCESSED] = [] @@ -164,10 +202,23 @@ def sha1hex(obj): def murmur3(obj): """Use Sha1 as a cryptographic hash.""" obj = obj.encode('utf-8') - return mmh3.hash(obj) + return mmh3.hash(obj, 123) + + @staticmethod + def murmur3_128bit(obj): + """Use Sha1 as a cryptographic hash.""" + obj = obj.encode('utf-8') + # hash64 is actually 128bit. Weird. + # 123 is the seed + return mmh3.hash64(obj, 123) def __getitem__(self, key): - if not isinstance(key, int) and key not in RESERVED_DICT_KEYS: + if not isinstance(key, int): + try: + if key in RESERVED_DICT_KEYS: + return super().__getitem__(key) + except Exception: + pass key = id(key) return super().__getitem__(key) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 8bc94f53..bc121569 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -81,6 +81,10 @@ class DeepDiff(ResultDict): exclude_types: list, default = None. List of object types to exclude from the report. + hasher: default = DeepHash.murmur3_128bit + Hash function to be used. If you don't want Murmur3, you can use Python's built-in hash function + by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. + view: string, default = text Starting the version 3 you can choosethe view into the deepdiff results. The default is the text view which has been the only view up until now. @@ -655,6 +659,7 @@ def __init__(self, include_string_type_changes=False, verbose_level=1, view='text', + hasher=DeepHash.murmur3_128bit, **kwargs): if kwargs: raise ValueError(( @@ -670,6 +675,7 @@ def __init__(self, self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.include_string_type_changes = include_string_type_changes self.hashes = {} + self.hasher = hasher if significant_digits is not None and significant_digits < 0: raise ValueError( @@ -989,7 +995,8 @@ def __create_hashtable(self, t, level): hashes_all = DeepHash(item, hashes=self.hashes, significant_digits=self.significant_digits, - include_string_type_changes=self.include_string_type_changes) + include_string_type_changes=self.include_string_type_changes, + hasher=self.hasher) item_hash = hashes_all.get(id(item), item) except Exception as e: # pragma: no cover logger.warning("Can not produce a hash for %s." diff --git a/docs/index.rst b/docs/index.rst index 1477a3b2..4a4e612a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,9 @@ DeepDiff 4.0.0 documentation! **DeepDiff: Deep Difference of dictionaries, iterables and almost any other object recursively.** -DeepDiff works with Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 +DeepDiff works with Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 + +NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2. ************ Installation From 47beb03675c5223516fb4ca6ca51b48430db8a1e Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:30:52 -0800 Subject: [PATCH 25/76] docs --- README.md | 22 +++++++++-- deepdiff/contenthash.py | 10 ++--- deepdiff/search.py | 2 +- docs/dsearch.rst | 3 ++ docs/index.rst | 81 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 103 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 20a44290..2ebda1f8 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,10 @@ [![Build Status](https://travis-ci.org/seperman/deepdiff.svg?branch=master)](https://travis-ci.org/seperman/deepdiff) [![Coverage Status](https://coveralls.io/repos/github/seperman/deepdiff/badge.svg?branch=master)](https://coveralls.io/github/seperman/deepdiff?branch=master) -Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. +- DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. +- DeepSearch: Search for objects within other objects. +- DeepHash: Hash any object based on their content. + Tested on Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 **NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** @@ -42,16 +45,26 @@ Tested on Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 ```python >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects ->>> from deepdiff import DeepSearch # For finding if item exists in an object +>>> from deepdiff import grep, DeepSearch # For finding if item exists in an object +>>> from deepdiff import DeepHash # For hashing objects based on their contents ``` +# Deep Diff + +DeepDiff gets the difference of 2 objects. + ## Parameters In addition to the 2 objects being compared: - [ignore_order](#ignore-order) - [report_repetition](#report-repetitions) +- [exclude_types](#exclude-types) +- [exclude_paths](#exclude-paths) +- [exclude_regex_paths](#exclude-regex-paths) - [verbose_level](#verbose-level) +- [significant_digits](#significant-digits) +- [view](#views) ## Supported data types @@ -127,6 +140,9 @@ use `exclude_paths` and pass a set or list of paths to exclude: {} ``` +### Exclude Regex Paths + + You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: ```python @@ -389,7 +405,7 @@ So for example `ddiff['dictionary_item_removed']` is a set if strings thus this The following examples are using the *default text view.* The Tree View is introduced in DeepDiff v3 and provides traversing capabilities through your diffed data and more! - Read more about the Tree View at the bottom of this page. + Read more about the Tree View at the [tree view section](#tree-view) of this page. ### Importing diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 697953e3..1064204f 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -39,10 +39,8 @@ class DeepHash(dict): For example you can use DeepHash to calculate the hash of a set or a dictionary! The core of DeepHash is a deterministic serialization of your object into a string so it - can be passed to a hash function. By default it uses Python's built-in hash function + can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. but you can pass another hash function to it if you want. - For example the Murmur3 hash function or a cryptographic hash function. - **Parameters** @@ -67,7 +65,7 @@ class DeepHash(dict): You can use it by passing: hasher=DeepHash.murmur3 for 32bit hash and hasher=hash for Python's builtin hash. - SHA1 is already provided as an alternative to the built-in hash function. + SHA1 is already provided as an alternative too: You can use it by passing: hasher=DeepHash.sha1hex ignore_repetition: Boolean, default = True @@ -200,13 +198,13 @@ def sha1hex(obj): @staticmethod def murmur3(obj): - """Use Sha1 as a cryptographic hash.""" + """Use murmur3 for 32 bit hash.""" obj = obj.encode('utf-8') return mmh3.hash(obj, 123) @staticmethod def murmur3_128bit(obj): - """Use Sha1 as a cryptographic hash.""" + """Use murmur3_128bit for 128 bit hash (default).""" obj = obj.encode('utf-8') # hash64 is actually 128bit. Weird. # 123 is the seed diff --git a/deepdiff/search.py b/deepdiff/search.py index 95555296..0fc8c9ca 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -311,7 +311,7 @@ def __search(self, obj, item, parent="root", parents_ids=frozenset({})): self.__search_obj(obj, item, parent, parents_ids) -class grep(object): +class grep: """ **Grep!** diff --git a/docs/dsearch.rst b/docs/dsearch.rst index f3b07b0a..60e9753f 100644 --- a/docs/dsearch.rst +++ b/docs/dsearch.rst @@ -8,6 +8,9 @@ DeepSearch Reference .. automodule:: deepdiff.search +.. autoclass:: grep + :members: + .. autoclass:: DeepSearch :members: diff --git a/docs/index.rst b/docs/index.rst index 4a4e612a..8f4ba52a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,11 @@ DeepDiff 4.0.0 documentation! ============================= -**DeepDiff: Deep Difference of dictionaries, iterables and almost any other object recursively.** +**DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes.** + +**DeepSearch: Search for objects within other objects.** + +**DeepHash: Hash any object based on their content even if they are not "hashable".** DeepDiff works with Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 @@ -26,7 +30,7 @@ Importing .. code:: python >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects - >>> from deepdiff import DeepSearch # For finding if item exists in an object + >>> from deepdiff import grep, DeepSearch # For finding if item exists in an object >>> from deepdiff import DeepHash # For hashing objects based on their contents ******** @@ -280,6 +284,10 @@ DeepDiff uses jsonpickle in order to serialize and deserialize its results into True +Read more in + +:doc:`/diff` + *********** Deep Search *********** @@ -294,16 +302,18 @@ Importing .. code:: python - >>> from deepdiff import DeepSearch + >>> from deepdiff import DeepSearch, grep >>> from pprint import pprint +DeepSearch comes with grep function which is easier to remember! + Search in list for string .. code:: python >>> obj = ["long somewhere", "string", 0, "somewhere great!"] >>> item = "somewhere" - >>> ds = DeepSearch(obj, item, verbose_level=2) + >>> ds = obj | grep(item, verbose_level=2) >>> print(ds) {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}} @@ -313,12 +323,72 @@ Search in nested data for string >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] >>> item = "somewhere" - >>> ds = DeepSearch(obj, item, verbose_level=2) + >>> ds = obj | grep(item, verbose_level=2) >>> pprint(ds, indent=2) { 'matched_paths': {"root[1]['somewhere']": 'around'}, 'matched_values': { 'root[0]': 'something somewhere', "root[1]['long']": 'somewhere'}} + +Read more in the Deep Search references: + +:doc:`/dsearch` + + +********* +Deep Hash +********* +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. + +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +The core of DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. +but you can pass another hash function to it if you want. + +Let's say you have a dictionary object. + +.. code:: python + + >>> from deepdiff import DeepHash + >>> + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + +.. code:: python + + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + +.. code:: python + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) + {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} + +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + +.. code:: python + + >>> hashes = DeepHash(obj) + >>> hashes[obj] + (3415089864575009947, 7987229399128149852) + +Read more in the Deep Hash reference: + +:doc:`/contenthash` + .. _ignore\_order: #ignore-order .. _report\_repetition: #report-repetitions .. _verbose\_level: #verbose-level @@ -354,6 +424,7 @@ Indices and tables Changelog ========= +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) From 5cd96b767aa3a88ada19e90874b3b1f6e8b2dd49 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:34:31 -0800 Subject: [PATCH 26/76] fixing tests --- tests/test_search.py | 86 ++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/test_search.py b/tests/test_search.py index 94ae964b..8762cae3 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -13,7 +13,7 @@ To run a specific test, run this from the root of repo: nosetests tests/test_search.py:DeepSearchTestCase.test_case_insensitive_of_str_in_list """ -import unittest +import pytest from deepdiff import DeepSearch, grep from datetime import datetime import logging @@ -34,44 +34,44 @@ def __repr__(self): return self.__str__() -class DeepSearchTestCase(unittest.TestCase): +class TestDeepSearch: """DeepSearch Tests.""" def test_number_in_list(self): obj = ["a", 10, 20] item = 10 result = {"matched_values": {'root[1]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_root(self): obj = "long string somewhere" result = {"matched_values": {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_root_verbose(self): obj = "long string somewhere" result = {"matched_values": {'root': "long string somewhere"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_tuple(self): obj = ("long", "string", 0, "somewhere") result = {"matched_values": {'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_list(self): obj = ["long", "string", 0, "somewhere"] result = {"matched_values": {'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_list_verbose(self): obj = ["long", "string", 0, "somewhere"] result = {"matched_values": {'root[3]': "somewhere"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_list_verbose2(self): obj = ["long", "string", 0, "somewhere great!"] result = {"matched_values": {'root[3]': "somewhere great!"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_list_verbose3(self): obj = ["long somewhere", "string", 0, "somewhere great!"] @@ -81,14 +81,14 @@ def test_string_in_list_verbose3(self): 'root[3]': "somewhere great!" } } - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_int_in_dictionary(self): obj = {"long": "somewhere", "num": 2, 0: 0, "somewhere": "around"} item = 2 result = {'matched_values': {"root['num']"}} ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary(self): obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} @@ -97,7 +97,7 @@ def test_string_in_dictionary(self): 'matched_values': {"root['long']"} } ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_case_insensitive(self): obj = {"long": "Somewhere over there!", "string": 2, 0: 0, "SOMEWHERE": "around"} @@ -106,7 +106,7 @@ def test_string_in_dictionary_case_insensitive(self): 'matched_values': {"root['long']"} } ds = DeepSearch(obj, item, verbose_level=1, case_sensitive=False) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_key_case_insensitive_partial(self): obj = {"SOMEWHERE here": "around"} @@ -114,7 +114,7 @@ def test_string_in_dictionary_key_case_insensitive_partial(self): 'matched_paths': {"root['SOMEWHERE here']"} } ds = DeepSearch(obj, item, verbose_level=1, case_sensitive=False) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_verbose(self): obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} @@ -127,7 +127,7 @@ def test_string_in_dictionary_verbose(self): } } ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_in_list_verbose(self): obj = [ @@ -148,25 +148,25 @@ def test_string_in_dictionary_in_list_verbose(self): } } ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_custom_object(self): obj = CustomClass('here, something', 'somewhere') result = {'matched_values': {'root.b'}} ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_custom_object_verbose(self): obj = CustomClass('here, something', 'somewhere out there') result = {'matched_values': {'root.b': 'somewhere out there'}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_custom_object_in_dictionary_verbose(self): obj = {1: CustomClass('here, something', 'somewhere out there')} result = {'matched_values': {'root[1].b': 'somewhere out there'}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_named_tuples_verbose(self): from collections import namedtuple @@ -181,13 +181,13 @@ def test_named_tuples_verbose(self): 'root.somewhere_good': 22 } } - self.assertEqual(ds, result) + assert ds == result def test_string_in_set_verbose(self): obj = {"long", "string", 0, "somewhere"} # result = {"matched_values": {'root[3]': "somewhere"}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(list(ds["matched_values"].values())[0], item) + assert list(ds["matched_values"].values())[0] == item def test_loop(self): class LoopTest(object): @@ -199,7 +199,7 @@ def __init__(self, a): ds = DeepSearch(obj, item, verbose_level=1) result = {'matched_values': {'root.a'}} - self.assertEqual(ds, result) + assert ds == result def test_loop_in_lists(self): obj = [1, 2, 'somewhere'] @@ -207,7 +207,7 @@ def test_loop_in_lists(self): ds = DeepSearch(obj, item, verbose_level=1) result = {'matched_values': {'root[2]'}} - self.assertEqual(ds, result) + assert ds == result def test_skip_path1(self): obj = { @@ -215,46 +215,46 @@ def test_skip_path1(self): "ingredients": ["no meat", "no eggs", "no dairy", "somewhere"] } ds = DeepSearch(obj, item, exclude_paths={"root['ingredients']"}) - self.assertEqual(ds, {}) + assert ds == {} def test_custom_object_skip_path(self): obj = CustomClass('here, something', 'somewhere') result = {} ds = DeepSearch(obj, item, verbose_level=1, exclude_paths=['root.b']) - self.assertEqual(ds, result) + assert ds == result def test_skip_list_path(self): obj = ['a', 'somewhere'] ds = DeepSearch(obj, item, exclude_paths=['root[1]']) result = {} - self.assertEqual(ds, result) + assert ds == result def test_skip_dictionary_path(self): obj = {1: {2: "somewhere"}} ds = DeepSearch(obj, item, exclude_paths=['root[1][2]']) result = {} - self.assertEqual(ds, result) + assert ds == result def test_skip_type_str(self): obj = "long string somewhere" result = {} ds = DeepSearch(obj, item, verbose_level=1, exclude_types=[str]) - self.assertEqual(ds, result) + assert ds == result def test_skip_regexp(self): obj = [{'a': 1, 'b': "somewhere"}, {'c': 4, 'b': "somewhere"}] ds = DeepSearch(obj, item, exclude_regex_paths=[r"root\[\d+\]"]) result = {} - self.assertEqual(ds, result) + assert ds == result def test_skip_regexp2(self): obj = {'a': [1, 2, [3, [item]]]} ds = DeepSearch(obj, item, exclude_regex_paths=[r"\[\d+\]"]) result = {} - self.assertEqual(ds, result) + assert ds == result def test_unknown_parameters(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepSearch(1, 1, wrong_param=2) def test_bad_attribute(self): @@ -271,44 +271,44 @@ def __str__(self): ds = DeepSearch(obj, item, verbose_level=1) result = {'unprocessed': ['root']} - self.assertEqual(ds, result) + assert ds == result ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_case_insensitive_of_str_in_list(self): obj = ["a", "bb", "BBC", "aBbB"] item = "BB" result = {"matched_values": {'root[1]', 'root[2]', 'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=False), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=False) == result def test_case_sensitive_of_str_in_list(self): obj = ["a", "bb", "BBC", "aBbB"] item = "BB" result = {"matched_values": {'root[2]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=True), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=True) == result def test_case_sensitive_of_str_in_one_liner(self): obj = "Hello, what's up?" item = "WHAT" result = {} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=True), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=True) == result def test_case_insensitive_of_str_in_one_liner(self): obj = "Hello, what's up?" item = "WHAT" result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=False), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=False) == result def test_none(self): obj = item = None result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_complex_obj(self): obj = datetime(2017, 5, 4, 1, 1, 1) item = datetime(2017, 5, 4, 1, 1, 1) result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_keep_searching_after_obj_match(self): @@ -324,7 +324,7 @@ def __eq__(self, other): obj = AlwaysEqual() item = AlwaysEqual() result = {'matched_values': {'root', 'root.some_attr'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_search_inherited_attributes(self): class Parent(object): @@ -336,10 +336,10 @@ class Child(Parent): obj = Child() item = 1 result = {'matched_values': {'root.a'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result -class GrepTestCase(unittest.TestCase): +class TestGrep: def test_grep_dict(self): obj = { @@ -347,4 +347,4 @@ def test_grep_dict(self): "ingredients": ["no meat", "no eggs", "no dairy", "somewhere"] } ds = obj | grep(item) - self.assertEqual(ds, {'matched_values': {"root['ingredients'][3]"}}) + assert ds == {'matched_values': {"root['ingredients'][3]"}} From 756983ed12ceac36163ace2e18663c56d07e81ad Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:36:39 -0800 Subject: [PATCH 27/76] fixing tests --- tests/test_serialization.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index a0862215..f2b04207 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -19,21 +19,20 @@ or using nosetests: nosetests tests/test_serialization.py:DeepDiffTestCase.test_diff_when_hash_fails """ -import unittest from deepdiff import DeepDiff import logging logging.disable(logging.CRITICAL) -class DeepAdditionsTestCase(unittest.TestCase): +class TestDeepAdditions: """Tests for Additions and Subtractions.""" def test_serialization_text(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - self.assertTrue("deepdiff.helper.RemapDict" in ddiff.json) + assert "deepdiff.helper.RemapDict" in ddiff.json def test_deserialization(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} @@ -41,14 +40,14 @@ def test_deserialization(self): ddiff = DeepDiff(t1, t2) jsoned = ddiff.json ddiff2 = DeepDiff.from_json(jsoned) - self.assertEqual(ddiff, ddiff2) + assert ddiff == ddiff2 def test_serialization_tree(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') jsoned = ddiff.json - self.assertTrue("world" in jsoned) + assert "world" in jsoned def test_deserialization_tree(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} @@ -56,14 +55,14 @@ def test_deserialization_tree(self): ddiff = DeepDiff(t1, t2, view='tree') jsoned = ddiff.json ddiff2 = DeepDiff.from_json(jsoned) - self.assertTrue('type_changes' in ddiff2) + assert 'type_changes' in ddiff2 def test_deleting_serialization_cache(self): t1 = {1: 1} t2 = {1: 2} ddiff = DeepDiff(t1, t2) - self.assertFalse(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') is False ddiff.json - self.assertTrue(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') del ddiff.json - self.assertFalse(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') is False From 098191092d5cbd7ff62fb916da65789c36071593 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:39:24 -0800 Subject: [PATCH 28/76] fix --- tests/test_helper.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_helper.py b/tests/test_helper.py index dffd1a2d..7b1aefe5 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -1,21 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - -import unittest from deepdiff.helper import short_repr -class HelperTestCase(unittest.TestCase): +class TestHelper: """Helper Tests.""" def test_short_repr_when_short(self): item = {1: 2} output = short_repr(item) - self.assertEqual(output, '{1: 2}') + assert output == '{1: 2}' def test_short_repr_when_long(self): item = {'Eat more': 'burritos'} output = short_repr(item) - self.assertEqual(output, "{'Eat more':...}") + assert output == "{'Eat more':...}" From e099615caef7e47e996ce46273c1236ab6eb3e8f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:50:14 -0800 Subject: [PATCH 29/76] fix --- tests/__init__.py | 2 - tests/test_diff_text.py | 19 ----- tests/test_diff_tree.py | 16 ---- tests/test_model.py | 160 ++++++++++++++++++---------------------- 4 files changed, 71 insertions(+), 126 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index f23f9dc2..a0010f30 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -# To run all the tests: -# python -m unittest discover class CustomClass(object): diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 2733efd8..184935ad 100644 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1,24 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_text - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_diff_text.DeepDiffTextTestCase.test_same_objects - -or using nosetests: - nosetests tests/test_diff_text.py:DeepDiffTestCase.test_diff_when_hash_fails -""" import datetime import pytest import logging diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index ca4e63ee..8c748087 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -1,21 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_tree - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_diff_tree.DeepDiffTreeTestCase.test_same_objects -""" import pytest from deepdiff import DeepDiff from deepdiff.helper import pypy3, notpresent diff --git a/tests/test_model.py b/tests/test_model.py index f193840b..59b7c6a6 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,24 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_ref - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_model.DiffLevelTestCase.test_path_when_both_children_empty -""" - -from unittest import TestCase import logging +import pytest from tests import CustomClass, CustomClassMisleadingRepr from deepdiff.model import (DiffLevel, ChildRelationship, DictRelationship, SubscriptableIterableRelationship, @@ -33,20 +16,20 @@ def get_param_from_obj(self, obj): return obj -class DictRelationshipTestCase(TestCase): - def setUp(self): - self.customkey = CustomClass(a=13, b=37) - self.customkey_misleading = CustomClassMisleadingRepr(a=11, b=20) - self.d = { +class TestDictRelationship: + def setup_class(cls): + cls.customkey = CustomClass(a=13, b=37) + cls.customkey_misleading = CustomClassMisleadingRepr(a=11, b=20) + cls.d = { 42: 'answer', 'vegan': 'for life', - self.customkey: 1337, - self.customkey_misleading: 'banana' + cls.customkey: 1337, + cls.customkey_misleading: 'banana' } def test_numkey(self): rel = DictRelationship(parent=self.d, child=self.d[42], param=42) - self.assertEqual(rel.get_param_repr(), "[42]") + assert rel.get_param_repr() == "[42]" def test_strkey(self): rel = ChildRelationship.create( @@ -55,136 +38,136 @@ def test_strkey(self): child=self.d['vegan'], param='vegan') result = rel.get_param_repr() - self.assertEqual(result, "['vegan']") + assert result == "['vegan']" def test_objkey(self): rel = DictRelationship( parent=self.d, child=self.d[self.customkey], param=self.customkey) - self.assertIsNone(rel.get_param_repr()) + assert rel.get_param_repr() is None def test_objkey_misleading_repr(self): rel = DictRelationship( parent=self.d, child=self.d[self.customkey_misleading], param=self.customkey_misleading) - self.assertIsNone(rel.get_param_repr()) + assert rel.get_param_repr() is None def test_get_param_from_dict(self): param = 42 rel = DictRelationship(parent=self.d, child=self.d[param], param=param) obj = {10: 10, param: 123} - self.assertEqual(rel.get_param_from_obj(obj), 123) + assert rel.get_param_from_obj(obj) == 123 -class ListRelationshipTestCase(TestCase): - def setUp(self): - self.custom = CustomClass(13, 37) - self.l = [1337, 'vegan', self.custom] +class TestListRelationship: + def setup_class(cls): + cls.custom = CustomClass(13, 37) + cls.l = [1337, 'vegan', cls.custom] def test_min(self): rel = SubscriptableIterableRelationship(self.l, self.l[0], 0) result = rel.get_param_repr() - self.assertEqual(result, "[0]") + assert result == "[0]" def test_max(self): rel = ChildRelationship.create(SubscriptableIterableRelationship, self.l, self.custom, 2) - self.assertEqual(rel.get_param_repr(), "[2]") + assert rel.get_param_repr() == "[2]" def test_get_param_from_obj(self): param = 0 rel = SubscriptableIterableRelationship(parent=self.l, child=self.l[param], param=param) obj = ['a', 'b', 'c'] - self.assertEqual(rel.get_param_from_obj(obj), 'a') + assert rel.get_param_from_obj(obj) == 'a' -class AttributeRelationshipTestCase(TestCase): - def setUp(self): - self.custom = CustomClass(13, 37) +class TestAttributeRelationship: + def setup_class(cls): + cls.custom = CustomClass(13, 37) def test_a(self): rel = AttributeRelationship(self.custom, 13, "a") result = rel.get_param_repr() - self.assertEqual(result, ".a") + assert result == ".a" def test_get_param_from_obj(self): rel = AttributeRelationship(self.custom, 13, "a") - self.assertEqual(rel.get_param_from_obj(self.custom), 13) + assert rel.get_param_from_obj(self.custom) == 13 -class DiffLevelTestCase(TestCase): - def setUp(self): +class TestDiffLevel: + def setup_class(cls): # Test data - self.custom1 = CustomClass(a='very long text here', b=37) - self.custom2 = CustomClass(a=313, b=37) - self.t1 = {42: 'answer', 'vegan': 'for life', 1337: self.custom1} - self.t2 = { + cls.custom1 = CustomClass(a='very long text here', b=37) + cls.custom2 = CustomClass(a=313, b=37) + cls.t1 = {42: 'answer', 'vegan': 'for life', 1337: cls.custom1} + cls.t2 = { 42: 'answer', 'vegan': 'for the animals', - 1337: self.custom2 + 1337: cls.custom2 } # Manually build diff, bottom up - self.lowest = DiffLevel( - self.custom1.a, self.custom2.a, report_type='values_changed') + cls.lowest = DiffLevel( + cls.custom1.a, cls.custom2.a, report_type='values_changed') # Test manual child relationship rel_int_low_t1 = AttributeRelationship( - parent=self.custom1, child=self.custom1.a, param="a") + parent=cls.custom1, child=cls.custom1.a, param="a") rel_int_low_t2 = AttributeRelationship( - parent=self.custom2, child=self.custom2.a, param="a") - self.intermediate = DiffLevel( - self.custom1, - self.custom2, - down=self.lowest, + parent=cls.custom2, child=cls.custom2.a, param="a") + cls.intermediate = DiffLevel( + cls.custom1, + cls.custom2, + down=cls.lowest, child_rel1=rel_int_low_t1, child_rel2=rel_int_low_t2) - self.lowest.up = self.intermediate + cls.lowest.up = cls.intermediate # Test automatic child relationship t1_child_rel = ChildRelationship.create( klass=DictRelationship, - parent=self.t1, - child=self.intermediate.t1, + parent=cls.t1, + child=cls.intermediate.t1, param=1337) t2_child_rel = ChildRelationship.create( klass=DictRelationship, - parent=self.t2, - child=self.intermediate.t2, + parent=cls.t2, + child=cls.intermediate.t2, param=1337) - self.highest = DiffLevel( - self.t1, - self.t2, - down=self.intermediate, + cls.highest = DiffLevel( + cls.t1, + cls.t2, + down=cls.intermediate, child_rel1=t1_child_rel, child_rel2=t2_child_rel) - self.intermediate.up = self.highest + cls.intermediate.up = cls.highest def test_all_up(self): - self.assertEqual(self.lowest.all_up, self.highest) + assert self.lowest.all_up == self.highest def test_all_down(self): - self.assertEqual(self.highest.all_down, self.lowest) + assert self.highest.all_down == self.lowest def test_automatic_child_rel(self): - self.assertIsInstance(self.highest.t1_child_rel, DictRelationship) - self.assertIsInstance(self.highest.t2_child_rel, DictRelationship) + assert isinstance(self.highest.t1_child_rel, DictRelationship) + assert isinstance(self.highest.t2_child_rel, DictRelationship) - self.assertEqual(self.highest.t1_child_rel.parent, self.highest.t1) - self.assertEqual(self.highest.t2_child_rel.parent, self.highest.t2) - self.assertEqual(self.highest.t1_child_rel.parent, self.highest.t1) - self.assertEqual(self.highest.t2_child_rel.parent, self.highest.t2) + assert self.highest.t1_child_rel.parent == self.highest.t1 + assert self.highest.t2_child_rel.parent == self.highest.t2 + assert self.highest.t1_child_rel.parent == self.highest.t1 + assert self.highest.t2_child_rel.parent == self.highest.t2 # Provides textual relationship from t1 to t1[1337] - self.assertEqual('[1337]', self.highest.t2_child_rel.get_param_repr()) + assert '[1337]' == self.highest.t2_child_rel.get_param_repr() def test_path(self): # Provides textual path all the way through - self.assertEqual(self.lowest.path("self.t1"), "self.t1[1337].a") + assert self.lowest.path("self.t1") == "self.t1[1337].a" def test_change_of_path_root(self): - self.assertEqual(self.lowest.path("root"), "root[1337].a") - self.assertEqual(self.lowest.path(""), "[1337].a") + assert self.lowest.path("root") == "root[1337].a" + assert self.lowest.path("") == "[1337].a" def test_path_when_both_children_empty(self): """ @@ -198,22 +181,21 @@ def test_path_when_both_children_empty(self): up = DiffLevel(t1, t2) down = up.down = DiffLevel(child_t1, child_t2) path = down.path() - self.assertEqual(path, 'root') + assert path == 'root' def test_repr_short(self): level = Verbose.level Verbose.level = 0 item_repr = repr(self.lowest) Verbose.level = level - self.assertEqual(item_repr, '') + assert item_repr == '' def test_repr_long(self): level = Verbose.level Verbose.level = 1 item_repr = repr(self.lowest) Verbose.level = level - self.assertEqual(item_repr, - "") + assert item_repr == "" def test_repetition_attribute_and_repr(self): t1 = [1, 1] @@ -221,20 +203,20 @@ def test_repetition_attribute_and_repr(self): some_repetition = 'some repetition' node = DiffLevel(t1, t2) node.additional['repetition'] = some_repetition - self.assertEqual(node.repetition, some_repetition) - self.assertEqual(repr(node), "") + assert node.repetition == some_repetition + assert repr(node) == "" -class ChildRelationshipTestCase(TestCase): +class TestChildRelationship: def test_create_invalid_klass(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): ChildRelationship.create(DiffLevel, "hello", 42) def test_rel_repr_short(self): rel = WorkingChildRelationship(parent="that parent", child="this child", param="some param") rel_repr = repr(rel) expected = "" - self.assertEqual(rel_repr, expected) + assert rel_repr == expected def test_rel_repr_long(self): rel = WorkingChildRelationship( @@ -243,4 +225,4 @@ def test_rel_repr_long(self): param="some param") rel_repr = repr(rel) expected = "" - self.assertEqual(rel_repr, expected) + assert rel_repr == expected From e6a94a31680b831d75226d6f17a921d8d725cc9c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 00:54:28 -0800 Subject: [PATCH 30/76] travis --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5ca01f81..84a55ba2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,9 +11,9 @@ python: sudo: false install: - - pip install coveralls + - pip install -r requirements-dev.txt -script: coverage run --source deepdiff setup.py test +script: pytest --cov=deepdiff tests/ after_success: - coveralls From e95d22846f9fd4b69fc5f0064e26b57d0ce5fba7 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 01:06:57 -0800 Subject: [PATCH 31/76] travis --- .travis.yml | 18 +++++++++--------- README.md | 2 +- docs/index.rst | 2 +- requirements-dev.txt | 1 + 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 84a55ba2..02959988 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,14 @@ language: python -python: - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "pypy3" - -sudo: false +matrix: + include: + - python: 2.7 + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: 3.7 + dist: xenial + sudo: true install: - pip install -r requirements-dev.txt diff --git a/README.md b/README.md index 2ebda1f8..c3d3df8f 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ - DeepSearch: Search for objects within other objects. - DeepHash: Hash any object based on their content. -Tested on Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 +Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 **NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** diff --git a/docs/index.rst b/docs/index.rst index 8f4ba52a..0f831308 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,7 +12,7 @@ DeepDiff 4.0.0 documentation! **DeepHash: Hash any object based on their content even if they are not "hashable".** -DeepDiff works with Python 3.3, 3.4, 3.5, 3.6, 3.7, Pypy3 +DeepDiff works with Python 3.4, 3.5, 3.6, 3.7, Pypy3 NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2. diff --git a/requirements-dev.txt b/requirements-dev.txt index ac0539d7..94563262 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ +-r requirements.txt pytest==4.0.1 pytest-cov==2.6.0 numpy==1.15.4 From 3d8f15916768ee576372face5d73e81b428a9e61 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 01:11:16 -0800 Subject: [PATCH 32/76] py2 bye bye --- .travis.yml | 2 +- setup.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 02959988..bcbb06e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,10 @@ language: python matrix: include: - - python: 2.7 - python: 3.4 - python: 3.5 - python: 3.6 + - python: pypy3 - python: 3.7 dist: xenial sudo: true diff --git a/setup.py b/setup.py index e9c1ebc0..a8f99c38 100755 --- a/setup.py +++ b/setup.py @@ -1,7 +1,11 @@ import os import re +import sys from setuptools import setup +if sys.version[0] == '2': # pragma: no cover + sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') + # if you are not using vagrant, just delete os.link directly, # The hard link only saves a little disk space, so you should not care if os.environ.get('USER', '') == 'vagrant': @@ -51,7 +55,6 @@ def get_reqs(filename): "Intended Audience :: Developers", "Operating System :: OS Independent", "Topic :: Software Development", - "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", From 82b09a01639a1ae6b7f7e8e829b634db7ee02f14 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 01:24:42 -0800 Subject: [PATCH 33/76] fixing ttest --- AUTHORS | 2 ++ README.md | 1 + tests/test_diff_text.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 23034d5d..d394827f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,3 +16,5 @@ Also thanks to: - maxrothman for search in inherited class attributes - maxrothman for search for types/objects - MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix and enum tests diff --git a/README.md b/README.md index c3d3df8f..a35f65c3 100644 --- a/README.md +++ b/README.md @@ -983,3 +983,4 @@ Also thanks to: - maxrothman for search for types/objects - MartyHub for exclude regex paths - sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix and enum tests diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index ffaf78ae..6ac1d76b 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -829,7 +829,7 @@ def __init__(self, a): } }, } - self.assertEqual(diff, result) + assert result == diff def get_custom_objects_add_and_remove(self): class ClassA(object): From 6e6a658f9cbe36c689b15a44a19df512fc41dd96 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 9 Dec 2018 23:46:30 -0800 Subject: [PATCH 34/76] Adding Boba-2 changes for excluding type --- AUTHORS | 1 + README.md | 1 + deepdiff/contenthash.py | 16 ++--- deepdiff/diff.py | 1 + deepdiff/helper.py | 56 +++++---------- docs/index.rst | 22 ++++-- tests/test_diff_text.py | 17 +++++ tests/test_hash.py | 138 +++++++++++++++++++----------------- tests/test_serialization.py | 2 +- 9 files changed, 136 insertions(+), 118 deletions(-) diff --git a/AUTHORS b/AUTHORS index d394827f..48dd21df 100644 --- a/AUTHORS +++ b/AUTHORS @@ -18,3 +18,4 @@ Also thanks to: - MartyHub for exclude regex paths - sreecodeslayer for DeepSearch match_string - Brian Maissy (brianmaissy) for weakref fix and enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order diff --git a/README.md b/README.md index a35f65c3..535981aa 100644 --- a/README.md +++ b/README.md @@ -984,3 +984,4 @@ Also thanks to: - MartyHub for exclude regex paths - sreecodeslayer for DeepSearch match_string - Brian Maissy (brianmaissy) for weakref fix and enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 1064204f..85be0472 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -54,8 +54,6 @@ class DeepHash(dict): exclude_types: list, default = None. List of object types to exclude from hashing. - Note that the deepdiff diffing functionality lets this to be the default at all times. - But if you are using DeepHash directly, you can set this parameter. hasher: function. default = DeepHash.murmur3_128bit hasher is the hashing function. The default is DeepHash.murmur3_128bit. @@ -255,13 +253,11 @@ def _skip_this(self, obj): def _prep_dict(self, obj, parents_ids=frozenset({})): result = [] - obj_keys = set(obj.keys()) - for key in obj_keys: + for key, item in obj.items(): key_hash = self._hash(key) - item = obj[key] item_id = id(item) - if parents_ids and item_id in parents_ids: + if (parents_ids and item_id in parents_ids) or self._skip_this(item): continue parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parents_ids_added) @@ -281,16 +277,16 @@ def _prep_iterable(self, obj, parents_ids=frozenset({})): result = defaultdict(int) - for i, x in enumerate(obj): - if self._skip_this(x): + for item in obj: + if self._skip_this(item): continue - item_id = id(x) + item_id = id(item) if parents_ids and item_id in parents_ids: continue parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) - hashed = self._hash(x, parents_ids_added) + hashed = self._hash(item, parents_ids_added) # counting repetitions result[hashed] += 1 diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 1445a15a..9dd0e0d4 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -994,6 +994,7 @@ def __create_hashtable(self, t, level): try: hashes_all = DeepHash(item, hashes=self.hashes, + exclude_types=self.exclude_types, significant_digits=self.significant_digits, include_string_type_changes=self.include_string_type_changes, hasher=self.hasher) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 0d38a69f..8f2125c9 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -31,18 +31,6 @@ IndexedHash = namedtuple('IndexedHash', 'indexes item') -EXPANDED_KEY_MAP = { # pragma: no cover - 'dic_item_added': 'dictionary_item_added', - 'dic_item_removed': 'dictionary_item_removed', - 'newindexes': 'new_indexes', - 'newrepeat': 'new_repeat', - 'newtype': 'new_type', - 'newvalue': 'new_value', - 'oldindexes': 'old_indexes', - 'oldrepeat': 'old_repeat', - 'oldtype': 'old_type', - 'oldvalue': 'old_value'} - def short_repr(item, max_length=15): """Short representation of item if it is too long""" @@ -91,35 +79,29 @@ class NotPresent(OtherTypes): # pragma: no cover notpresent = NotPresent() -WARNING_NUM = 0 - - -def warn(*args, **kwargs): - global WARNING_NUM +# Disabling remapping from old to new keys since the mapping is deprecated. +RemapDict = dict - if WARNING_NUM < 10: - WARNING_NUM += 1 - logger.warning(*args, **kwargs) +# class RemapDict(dict): +# """ +# DISABLED +# Remap Dictionary. -class RemapDict(dict): - """ - Remap Dictionary. - - For keys that have a new, longer name, remap the old key to the new key. - Other keys that don't have a new name are handled as before. - """ +# For keys that have a new, longer name, remap the old key to the new key. +# Other keys that don't have a new name are handled as before. +# """ - def __getitem__(self, old_key): - new_key = EXPANDED_KEY_MAP.get(old_key, old_key) - if new_key != old_key: - warn( - "DeepDiff Deprecation: %s is renamed to %s. Please start using " - "the new unified naming convention.", old_key, new_key) - if new_key in self: - return self.get(new_key) - else: # pragma: no cover - raise KeyError(new_key) +# def __getitem__(self, old_key): +# new_key = EXPANDED_KEY_MAP.get(old_key, old_key) +# if new_key != old_key: +# logger.warning( +# "DeepDiff Deprecation: %s is renamed to %s. Please start using " +# "the new unified naming convention.", old_key, new_key) +# if new_key in self: +# return self.get(new_key) +# else: # pragma: no cover +# raise KeyError(new_key) class Verbose(object): diff --git a/docs/index.rst b/docs/index.rst index 0f831308..48834d44 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -475,10 +475,20 @@ Victor Hahn Castell ALso thanks to: -- nfvs for Travis-CI setup script -- brbsix for initial Py3 porting -- WangFenjin for unicode support -- timoilya for comparing list of sets when ignoring order -- Bernhard10 for significant digits comparison +- nfvs for Travis-CI setup script. +- brbsix for initial Py3 porting. +- WangFenjin for unicode support. +- timoilya for comparing list of sets when ignoring order. +- Bernhard10 for significant digits comparison. - b-jazz for PEP257 cleanup, Standardize on full names, fixing line endings. -- Victor Hahn Castell @ Flexoptix for deep set comparison +- finnhughes for fixing __slots__ +- moloney for Unicode vs. Bytes default +- serv-inc for adding help(deepdiff) +- movermeyer for updating docs +- maxrothman for search in inherited class attributes +- maxrothman for search for types/objects +- MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix and enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order + diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 6ac1d76b..34f3bf3a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1253,6 +1253,7 @@ def test_negative_significant_digits(self): with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_base_level_dictionary_remapping(self): """ Since subclassed dictionaries that override __getitem__ treat newdict.get(key) @@ -1266,6 +1267,7 @@ def test_base_level_dictionary_remapping(self): assert ddiff['dic_item_added'] == ddiff['dictionary_item_added'] assert ddiff['dic_item_removed'] == ddiff['dictionary_item_removed'] + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_index_and_repeat_dictionary_remapping(self): t1 = [1, 3, 1, 4] t2 = [4, 4, 1] @@ -1275,6 +1277,7 @@ def test_index_and_repeat_dictionary_remapping(self): assert ddiff['repetition_change']['root[0]']['oldindexes'] == ddiff['repetition_change']['root[0]']['old_indexes'] assert ddiff['repetition_change']['root[0]']['oldrepeat'] == ddiff['repetition_change']['root[0]']['old_repeat'] + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_value_and_type_dictionary_remapping(self): t1 = {1: 1, 2: 2} t2 = {1: 1, 2: '2'} @@ -1382,6 +1385,20 @@ def test_skip_str_type_in_dictionary(self): result = {} assert result == ddiff + def test_skip_str_type_in_dict_on_list(self): + t1 = [{1: "a"}] + t2 = [{}] + ddiff = DeepDiff(t1, t2, exclude_types=[str]) + result = {} + assert result == ddiff + + def test_skip_str_type_in_dict_on_list_when_ignored_order(self): + t1 = [{1: "a"}] + t2 = [{}] + ddiff = DeepDiff(t1, t2, exclude_types=[str], ignore_order=True) + result = {} + assert result == ddiff + def test_unknown_parameters(self): with pytest.raises(ValueError): DeepDiff(1, 1, wrong_param=2) diff --git a/tests/test_hash.py b/tests/test_hash.py index e3fd77c6..4d754de8 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -291,7 +291,7 @@ def test_skip_type(self): l1 = logging.getLogger("test") obj = {"log": l1, 2: 1337} result = DeepHashPrep(obj, exclude_types={logging.Logger}) - assert result[id(l1)] is skipped + assert id(l1) not in result def test_prep_dic_with_loop(self): obj = {2: 1337} @@ -313,70 +313,80 @@ def test_prep_iterable_with_excluded_type(self): result = DeepHashPrep(obj, exclude_types={logging.Logger}) assert id(l1) not in result + def test_skip_str_type_in_dict_on_list(self): + dic1 = {1: "a"} + t1 = [dic1] + dic2 = {} + t2 = [dic2] + t1_hash = DeepHashPrep(t1, exclude_types=[str]) + t2_hash = DeepHashPrep(t2, exclude_types=[str]) + assert id(1) in t1_hash + assert t1_hash[dic1] == t2_hash[dic2] + + +class TestDeepHashSHA1: + """DeepHash with SHA1 Tests.""" + + def test_prep_str_sha1(self): + obj = "a" + expected_result = { + id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + assert expected_result == result + + def test_prep_str_sha1_fail_if_mutable(self): + """ + This test fails if ContentHash is getting a mutable copy of hashes + which means each init of the ContentHash will have hashes from + the previous init. + """ + obj1 = "a" + id_obj1 = id(obj1) + expected_result = { + id_obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj1, hasher=DeepHash.sha1hex) + assert expected_result == result + obj2 = "b" + result = DeepHash(obj2, hasher=DeepHash.sha1hex) + assert id_obj1 not in result + + def test_bytecode(self): + obj = b"a" + expected_result = { + id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + assert expected_result == result + + def test_list1(self): + string1 = "a" + obj = [string1, 10, 20] + expected_result = { + id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', + id(10): DeepHash.sha1hex('int:10'), + id(20): DeepHash.sha1hex('int:20'), + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + assert expected_result == result -# class TestDeepHashSHA1: -# """DeepHash with SHA1 Tests.""" - -# def test_prep_str_sha1(self): -# obj = "a" -# expected_result = { -# id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' -# } -# result = DeepHash(obj, hasher=DeepHash.sha1hex) -# assert expected_result == result - -# def test_prep_str_sha1_fail_if_mutable(self): -# """ -# This test fails if ContentHash is getting a mutable copy of hashes -# which means each init of the ContentHash will have hashes from -# the previous init. -# """ -# obj1 = "a" -# id_obj1 = id(obj1) -# expected_result = { -# id_obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' -# } -# result = DeepHash(obj1, hasher=DeepHash.sha1hex) -# assert expected_result == result -# obj2 = "b" -# result = DeepHash(obj2, hasher=DeepHash.sha1hex) -# assert id_obj1 not in result) - -# def test_bytecode(self): -# obj = b"a" -# expected_result = { -# id(obj): '1283c61f8aa47c22d22552b742c93f6f6dac83ab' -# } -# result = DeepHash(obj, hasher=DeepHash.sha1hex) -# assert expected_result == result - -# def test_list1(self): -# string1 = "a" -# obj = [string1, 10, 20] -# expected_result = { -# id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', -# id(obj): 'ad8e2f1479d6a5e1b01304f18f04bbe3ea0673ca', -# id(10): DeepHash.sha1hex('int:10'), -# id(20): DeepHash.sha1hex('int:20'), -# } -# result = DeepHash(obj, hasher=DeepHash.sha1hex) -# assert expected_result == result - -# def test_dict1(self): -# string1 = "a" -# key1 = "key1" -# obj = {key1: string1, 1: 10, 2: 20} -# expected_result = { -# id(1): DeepHash.sha1hex('int:1'), -# id(10): DeepHash.sha1hex('int:10'), -# id(2): DeepHash.sha1hex('int:2'), -# id(20): DeepHash.sha1hex('int:20'), -# id(key1): '35624f541de8d2cc9c31deba03c7dda9b1da09f7', -# id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', -# id(obj): '8fa42fa0aa950885c4c1ec95a3d6423fc673bf49' -# } -# result = DeepHash(obj, hasher=DeepHash.sha1hex) -# assert expected_result == result + def test_dict1(self): + string1 = "a" + key1 = "key1" + obj = {key1: string1, 1: 10, 2: 20} + expected_result = { + id(1): DeepHash.sha1hex('int:1'), + id(10): DeepHash.sha1hex('int:10'), + id(2): DeepHash.sha1hex('int:2'), + id(20): DeepHash.sha1hex('int:20'), + id(key1): '1073ab6cda4b991cd29f9e83a307f34004ae9327', + id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' + } + result = DeepHash(obj, hasher=DeepHash.sha1hex) + assert expected_result == result class TestHasher: diff --git a/tests/test_serialization.py b/tests/test_serialization.py index f2b04207..db9a542f 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -32,7 +32,7 @@ def test_serialization_text(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - assert "deepdiff.helper.RemapDict" in ddiff.json + assert "builtins.list" in ddiff.json def test_deserialization(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} From 96b2c8696d3a1285f70859a02eff4d4557a4b63f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 10 Dec 2018 00:07:21 -0800 Subject: [PATCH 35/76] ading some serialization options --- deepdiff/diff.py | 42 +++++++++++++++++++++++++++++++++++-- deepdiff/helper.py | 6 ++++++ tests/test_serialization.py | 10 ++++----- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 9dd0e0d4..9f0c708f 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -10,14 +10,16 @@ import re import difflib import logging +import json import jsonpickle +import warnings from decimal import Decimal from collections.abc import Mapping, Iterable from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, - IndexedHash, Verbose, unprocessed) + IndexedHash, Verbose, unprocessed, json_convertor_default) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship @@ -26,6 +28,7 @@ from itertools import zip_longest logger = logging.getLogger(__name__) +warnings.simplefilter('once', DeprecationWarning) class DeepDiff(ResultDict): @@ -1154,6 +1157,10 @@ def __diff(self, level, parents_ids=frozenset({})): @property def json(self): + warnings.warn( + "json property will be deprecated. Instead use: to_json_pickle() to get the json pickle or to_json() for bare-bone json.", + DeprecationWarning + ) if not hasattr(self, '_json'): # copy of self removes all the extra attributes since it assumes # we have only a simple dictionary. @@ -1161,14 +1168,45 @@ def json(self): self._json = jsonpickle.encode(copied) return self._json + def to_json_pickle(self): + """ + Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, doing to_json is the safer option. + """ + copied = self.copy() + return jsonpickle.encode(copied) + @json.deleter def json(self): del self._json @classmethod - def from_json(self, value): + def from_json(cls, value): + warnings.warn( + "from_json is renamed to from_json_pickle", + DeprecationWarning + ) + return cls.from_json_pickle(value) + + @classmethod + def from_json_pickle(cls, value): + """ + Load DeepDiff object with all the bells and whistles from the json pickle dump. + Note that json pickle dump comes from to_json_pickle + """ return jsonpickle.decode(value) + def to_json(self): + """ + Dump json of the text view + """ + return json.dumps(self, default=json_convertor_default) + + def to_dict(self): + """ + Dump dictionary of the text view + """ + return dict(self) + if __name__ == "__main__": # pragma: no cover import doctest diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 8f2125c9..38a97a5c 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -109,3 +109,9 @@ class Verbose(object): Global verbose level """ level = 1 + + +def json_convertor_default(obj): + if isinstance(obj, Decimal): + return float(obj) + raise TypeError diff --git a/tests/test_serialization.py b/tests/test_serialization.py index db9a542f..4b1d113e 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -32,13 +32,13 @@ def test_serialization_text(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - assert "builtins.list" in ddiff.json + assert "builtins.list" in ddiff.to_json_pickle() def test_deserialization(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - jsoned = ddiff.json + jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) assert ddiff == ddiff2 @@ -46,18 +46,18 @@ def test_serialization_tree(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') - jsoned = ddiff.json + jsoned = ddiff.to_json_pickle() assert "world" in jsoned def test_deserialization_tree(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') - jsoned = ddiff.json + jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) assert 'type_changes' in ddiff2 - def test_deleting_serialization_cache(self): + def test_deleting_serialization_cache_when_using_the_property(self): t1 = {1: 1} t2 = {1: 2} ddiff = DeepDiff(t1, t2) From d39d6b8004f05c50a4b9c05fb112eeb6567b9522 Mon Sep 17 00:00:00 2001 From: Brian Maissy Date: Thu, 8 Nov 2018 10:55:21 +0200 Subject: [PATCH 36/76] add tests to make sure hashing and comparing enums works --- tests/test_diff_text.py | 26 ++++++++++++++++++++++++++ tests/test_hash.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) mode change 100644 => 100755 tests/test_hash.py diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 34f3bf3a..01d77664 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -761,6 +761,32 @@ def test_named_tuples(self): } assert result == ddiff + def test_enums(self): + from enum import Enum + + class MyEnum(Enum): + A = 1 + B = 2 + + ddiff = DeepDiff(MyEnum.A, MyEnum(1)) + result = {} + assert ddiff == result + + ddiff = DeepDiff(MyEnum.A, MyEnum.B) + result = { + 'values_changed': { + 'root._name_': { + 'old_value': 'A', + 'new_value': 'B' + }, + 'root._value_': { + 'old_value': 1, + 'new_value': 2 + } + } + } + assert ddiff == result + def test_custom_objects_change(self): t1 = CustomClass(1) t2 = CustomClass(2) diff --git a/tests/test_hash.py b/tests/test_hash.py old mode 100644 new mode 100755 index 4d754de8..0cf9e984 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -7,6 +7,7 @@ from collections import namedtuple from functools import partial import logging +from enum import Enum logging.disable(logging.CRITICAL) @@ -94,7 +95,7 @@ def test_list_and_tuple(self): def test_named_tuples(self): # checking if pypy3 is running the test - # in that case due to a pypy3 bug or something + # in that case due to a difference of string interning implementation # the id of x inside the named tuple changes. x = "x" x_id = id(x) @@ -112,6 +113,38 @@ def test_named_tuples(self): } assert expected_result == result + def test_enum(self): + class MyEnum(Enum): + A = 1 + B = 2 + + # checking if pypy3 is running the test + # in that case due to a difference of string interning implementation + # the ids of strings change + if pypy3: + # only compare the hashes for the enum instances themselves + assert DeepHashPrep(MyEnum.A)[id(MyEnum.A)] == ( + 'objdict:{' + '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' + '_name_:A;_value_:int:1}' + ) + assert DeepHashPrep(MyEnum.B)[id(MyEnum.B)] == ( + 'objdict:{' + '__objclass__:EnumMeta:objdict:{_name_:A;_value_:int:1};' + '_name_:B;_value_:int:2}' + ) + assert DeepHashPrep(MyEnum(1))[id(MyEnum.A)] == ( + 'objdict:{' + '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' + '_name_:A;_value_:int:1}' + ) + else: + assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum.A) + assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum(1)) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.name) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.value) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.B) + def test_dict_hash(self): string1 = "a" string1_prepped = prep_str(string1) From 3c1f50307f5049a84f73b498ddd5c52eca0d4aea Mon Sep 17 00:00:00 2001 From: Devipriya Sarkar Date: Thu, 31 Jan 2019 23:27:13 +0530 Subject: [PATCH 37/76] Update README.md to fix a broken link Update broken link in section "Verbose Level" to point to correct example link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7fb5dae3..7a07f27e 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ Approximate float comparison: Verbose level by default is 1. The possible values are 0, 1 and 2. -- Verbose level 0: won't report values when type changed. [Example](##type-of-an-item-has-changed) +- Verbose level 0: won't report values when type changed. [Example](#type-of-an-item-has-changed) - Verbose level 1: default - Verbose level 2: will report values when custom objects or dictionaries have items added or removed. [Example](#items-added-or-removed-verbose) From bfbdf860b57d3907040e77882bccc3133c49e6b2 Mon Sep 17 00:00:00 2001 From: Brian Maissy Date: Mon, 11 Feb 2019 07:56:22 +0200 Subject: [PATCH 38/76] fix bugs involving child classes with __slots__ and objects with bad attributes --- deepdiff/contenthash.py | 3 +++ deepdiff/diff.py | 21 ++++++++++++++++----- tests/test_diff_text.py | 24 ++++++++++++++++++++++++ tests/test_hash.py | 16 ++++++++++++++++ 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 85be0472..2a235713 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -369,6 +369,9 @@ def _hash(self, obj, parents_ids=frozenset({})): if result is not_hashed: # pragma: no cover self[UNPROCESSED].append(obj) + elif result is unprocessed: + pass + elif self.constant_size: if isinstance(obj, strings): result_cleaned = result diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 9dd0e0d4..954b7691 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -23,7 +23,7 @@ from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship from deepdiff.contenthash import DeepHash -from itertools import zip_longest +from itertools import zip_longest, chain logger = logging.getLogger(__name__) @@ -752,10 +752,21 @@ def unmangle(attribute): ) return attribute - slots = object.__slots__ - if isinstance(slots, strings): - return {slots: getattr(object, unmangle(slots))} - return {i: getattr(object, unmangle(i)) for i in slots} + all_slots = [] + + if isinstance(object, type): + mro = object.__mro__ + else: + mro = object.__class__.__mro__ + + for type_in_mro in mro: + slots = getattr(type_in_mro, '__slots__', ()) + if isinstance(slots, strings): + all_slots.append(slots) + else: + all_slots.extend(slots) + + return {i: getattr(object, unmangle(i)) for i in all_slots} def __diff_obj(self, level, parents_ids=frozenset({}), is_namedtuple=False): diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 34f3bf3a..50eeaa22 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -796,6 +796,30 @@ def __init__(self, x, y): } assert result == ddiff + def test_custom_objects_slot_in_parent_class_change(self): + class ClassA(object): + __slots__ = ['x'] + + class ClassB(ClassA): + __slots__ = ['y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + t1 = ClassB(1, 1) + t2 = ClassB(2, 1) + ddiff = DeepDiff(t1, t2) + result = { + 'values_changed': { + 'root.x': { + 'old_value': 1, + 'new_value': 2 + } + } + } + assert result == ddiff + def test_custom_objects_with_single_protected_slot(self): class ClassA(object): __slots__ = '__a' diff --git a/tests/test_hash.py b/tests/test_hash.py index 4d754de8..2914de87 100644 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -47,6 +47,22 @@ def test_list_of_sets(self): expected_result = {id(1), id(2), id(a), id(b), id(obj)} assert set(result.keys()) == expected_result + def test_bad_attribute(self): + class Bad(object): + __slots__ = ['x', 'y'] + + def __getattr__(self, key): + raise AttributeError("Bad item") + + def __str__(self): + return "Bad Object" + + t1 = Bad() + + result = DeepHash(t1) + expected_result = {id(t1): unprocessed, 'unprocessed': [t1]} + assert expected_result == result + class TestDeepHashPrep: """DeepHashPrep Tests covering object serialization.""" From 675c94eaf3d0b5237100ec2e8b7c4669d51d7aaf Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 10 Feb 2019 22:59:39 -0800 Subject: [PATCH 39/76] using orderedset --- deepdiff/contenthash.py | 14 +- deepdiff/diff.py | 664 ++---------------------------------- deepdiff/diff_doc.rst | 636 ++++++++++++++++++++++++++++++++++ deepdiff/helper.py | 40 ++- deepdiff/model.py | 11 +- requirements.txt | 1 + tests/test_serialization.py | 4 +- 7 files changed, 718 insertions(+), 652 deletions(-) create mode 100644 deepdiff/diff_doc.rst diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 85be0472..de61d27f 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -58,10 +58,10 @@ class DeepHash(dict): hasher: function. default = DeepHash.murmur3_128bit hasher is the hashing function. The default is DeepHash.murmur3_128bit. But you can pass another hash function to it if you want. - For example the Murmur3 32bit hash function or a cryptographic hash function or Python's builtin hash function. + For example a cryptographic hash function or Python's builtin hash function. All it needs is a function that takes the input in string format and returns the hash. - You can use it by passing: hasher=DeepHash.murmur3 for 32bit hash and hasher=hash for Python's builtin hash. + You can use it by passing: hasher=hash for Python's builtin hash. SHA1 is already provided as an alternative too: You can use it by passing: hasher=DeepHash.sha1hex @@ -194,19 +194,13 @@ def sha1hex(obj): obj = obj.encode('utf-8') return sha1(obj).hexdigest() - @staticmethod - def murmur3(obj): - """Use murmur3 for 32 bit hash.""" - obj = obj.encode('utf-8') - return mmh3.hash(obj, 123) - @staticmethod def murmur3_128bit(obj): """Use murmur3_128bit for 128 bit hash (default).""" obj = obj.encode('utf-8') # hash64 is actually 128bit. Weird. - # 123 is the seed - return mmh3.hash64(obj, 123) + # 1203 is the seed + return mmh3.hash64(obj, 1203) def __getitem__(self, key): if not isinstance(key, int): diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 9f0c708f..0ff6dcdb 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -8,6 +8,7 @@ # However the docstring expects it in a specific order in order to pass! import re +import os import difflib import logging import json @@ -15,9 +16,11 @@ import warnings from decimal import Decimal - +from itertools import zip_longest from collections.abc import Mapping, Iterable +from ordered_set import OrderedSet + from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed, json_convertor_default) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel @@ -25,630 +28,22 @@ from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship from deepdiff.contenthash import DeepHash -from itertools import zip_longest logger = logging.getLogger(__name__) warnings.simplefilter('once', DeprecationWarning) +TREE_VIEW = 'tree' +TEXT_VIEW = 'text' -class DeepDiff(ResultDict): - r""" - **DeepDiff** - - Deep Difference of dictionaries, iterables, strings and almost any other object. - It will recursively look for all the changes. - - DeepDiff 3.0 added the concept of views. - There is a default "text" view and a "tree" view. - - **Parameters** - - t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ - This is the first item to be compared to the second item - - t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ - The second item is to be compared to the first one - - ignore_order : Boolean, defalt=False ignores orders for iterables. - Note that if you have iterables contatining any unhashable, ignoring order can be expensive. - Normally ignore_order does not report duplicates and repetition changes. - In order to report repetitions, set report_repetition=True in addition to ignore_order=True - report_repetition : Boolean, default=False reports repetitions when set True - ONLY when ignore_order is set True too. This works for iterables. - This feature currently is experimental and is not production ready. +current_dir = os.path.dirname(os.path.abspath(__file__)) - significant_digits : int >= 0, default=None. - If it is a non negative integer, it compares only that many digits AFTER - the decimal point. +with open(os.path.join(current_dir, 'diff_doc.rst'), 'r') as doc_file: + doc = doc_file.read() - This only affects floats, decimal.Decimal and complex. - Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits - - Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - - For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) - - verbose_level : int >= 0, default = 1. - Higher verbose level shows you more details. - For example verbose level 1 shows what dictionary item are added or removed. - And verbose level 2 shows the value of the items that are added or removed too. - - exclude_paths: list, default = None. - List of paths to exclude from the report. - - exclude_regex_paths: list, default = None. - List of regex paths to exclude from the report. - - exclude_types: list, default = None. - List of object types to exclude from the report. - - hasher: default = DeepHash.murmur3_128bit - Hash function to be used. If you don't want Murmur3, you can use Python's built-in hash function - by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. - - view: string, default = text - Starting the version 3 you can choosethe view into the deepdiff results. - The default is the text view which has been the only view up until now. - The new view is called the tree view which allows you to traverse through - the tree of changed items. - - **Returns** - - A DeepDiff object that has already calculated the difference of the 2 items. - - **Supported data types** - - int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - - **Text View** - - Text view is the original and currently the default view of DeepDiff. - - It is called text view because the results contain texts that represent the path to the data: - - Example of using the text view. - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> print(ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} - - So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. - - .. seealso:: - The following examples are using the *default text view.* - The Tree View is introduced in DeepDiff v3 and provides - traversing capabilitie through your diffed data and more! - Read more about the Tree View at the bottom of this page. - - Importing - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - - Same object returns empty - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = t1 - >>> print(DeepDiff(t1, t2)) - {} - - Type of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} - - Value of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - - Item added and/or removed - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, - 'dictionary_item_removed': {'root[4]'}} - - Set verbose level to 2 in order to see the added or removed items with their values - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2, verbose_level=2) - >>> pprint(ddiff, indent=2) - { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, - 'dictionary_item_removed': {'root[4]': 4}} - - String difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} - >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} - - - String difference 2 - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - - >>> - >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ - @@ -1,5 +1,4 @@ - -world! - -Goodbye! - +world - 1 - 2 - End - - - Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - - And if you don't care about the value of items that have changed type, please set verbose level to 0 - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': }}} - - List difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} - - List difference 2: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} - - List difference ignoring order or duplicates: (with the same dictionaries as above) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, ignore_order=True) - >>> print (ddiff) - {} - - List difference ignoring order but reporting repetitions: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {'root[1]': 3}, - 'repetition_change': { 'root[0]': { 'new_indexes': [2], - 'new_repeat': 1, - 'old_indexes': [0, 2], - 'old_repeat': 2, - 'value': 1}, - 'root[3]': { 'new_indexes': [0, 1], - 'new_repeat': 2, - 'old_indexes': [3], - 'old_repeat': 1, - 'value': 4}}} - - List that contains dictionary: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {"root[4]['b'][2][2]"}, - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} - - Sets: - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff) - {'set_item_added': {'root[5]', 'root[3]'}, 'set_item_removed': {'root[8]'}} - - Named Tuples: - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> pprint (DeepDiff(t1, t2)) - {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} - - Custom objects: - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - - Object attribute added: - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': {'root.c'}, - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - - Approximate decimals comparison (Significant digits after the point): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0) - {} - >>> DeepDiff(t1, t2, significant_digits=1) - {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} - - Approximate float comparison (Significant digits after the point): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> pprint(DeepDiff(t1, t2, significant_digits=3)) - {} - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} - >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) - {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} - - - .. note:: - All the examples for the text view work for the tree view too. - You just need to set view='tree' to get it in tree form. - - - **Tree View** - - Starting the version 3 You can chooe the view into the deepdiff results. - The tree view provides you with tree objects that you can traverse through to find - the parents of the objects that are diffed and the actual objects that are being diffed. - This view is very useful when dealing with nested objects. - Note that tree view always returns results in the form of Python sets. - - You can traverse through the tree elements! - - .. note:: - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual - representation for the text view. - - .. code:: text - - +---------------------------------------------------------------+ - | | - | parent(t1) parent node parent(t2) | - | + ^ + | - +------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | - +------|----------------------|-------------------------|-------+ - | v v v | - | child(t1) child node child(t2) | - | | - +---------------------------------------------------------------+ - - - :up: Move up to the parent node - :down: Move down to the child node - :path(): Get the path to the current node - :t1: The first item in the current node that is being diffed - :t2: The second item in the current node that is being diffed - :additional: Additional information about the node i.e. repetition - :repetition: Shortcut to get the repetition report - - - The tree view allows you to have more than mere textual representaion of the diffed objects. - It gives you the actual objects (t1, t2) throughout the tree of parents and children. - - **Examples Tree View** - - .. note:: - The Tree View is introduced in DeepDiff 3. - Set view='tree' in order to use this view. - - Value of an item has changed (Tree View) - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') - >>> ddiff_verbose0 - {'values_changed': {}} - >>> - >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') - >>> ddiff_verbose1 - {'values_changed': {}} - >>> set_of_values_changed = ddiff_verbose1['values_changed'] - >>> # since set_of_values_changed includes only one item in a set - >>> # in order to get that one item we can: - >>> (changed,) = set_of_values_changed - >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - - >>> changed.t1 - 2 - >>> changed.t2 - 4 - >>> # You can traverse through the tree, get to the parents! - >>> changed.up - - - List difference (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff - {'iterable_item_removed': {, }} - >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. - >>> # One way to get one item from the set is to convert it to a list - >>> # And then get the first item of the list: - >>> removed = list(ddiff['iterable_item_removed'])[0] - >>> removed - - >>> - >>> parent = removed.up - >>> parent - - >>> parent.path() - "root[4]['b']" - >>> parent.t1 - [1, 2, 3, 4] - >>> parent.t2 - [1, 2] - >>> parent.up - - >>> parent.up.up - - >>> parent.up.up.t1 - {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} - >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff - True - - List difference 2 (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent = 2) - { 'iterable_item_added': {}, - 'values_changed': { , - }} - >>> - >>> # Note that iterable_item_added is a set with one item. - >>> # So in order to get that one item from it, we can do: - >>> - >>> (added,) = ddiff['iterable_item_added'] - >>> added - - >>> added.up.up - - >>> added.up.up.path() - 'root[4]' - >>> added.up.up.down - - >>> - >>> # going up twice and then down twice gives you the same node in the tree: - >>> added.up.up.down.down == added - True - - List difference ignoring order but reporting repetitions (Tree View) - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {}, - 'repetition_change': { , - }} - >>> - >>> # repetition_change is a set with 2 items. - >>> # in order to get those 2 items, we can do the following. - >>> # or we can convert the set to list and get the list items. - >>> # or we can iterate through the set items - >>> - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 # the default verbosity is set to 1. - - >>> # The actual data regarding the repetitions can be found in the repetition attribute: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> - >>> # If you change the verbosity, you will see less: - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) - >>> ddiff - {'repetition_change': {, }, 'iterable_item_removed': {}} - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 - - >>> - >>> # But the verbosity level does not change the actual report object. - >>> # It only changes the textual representaion of the object. We get the actual object here: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> repeat1.t1 - 4 - >>> repeat1.t2 - 4 - >>> repeat1.up - - - List that contains dictionary (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {}, - 'values_changed': {}} - - Sets (Tree View): - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> print(ddiff) - {'set_item_removed': {}, 'set_item_added': {, }} - >>> # grabbing one item from set_item_removed set which has one item only - >>> (item,) = ddiff['set_item_removed'] - >>> item.up - - >>> item.up.t1 == t1 - True - - Named Tuples (Tree View): - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} - - Custom objects (Tree View): - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} - - Object attribute added (Tree View): - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2, view='tree')) - {'attribute_added': {}, - 'values_changed': {}} - - Approximate decimals comparison (Significant digits after the point) (Tree View): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0, view='tree') - {} - >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': {}} - >>> (change1,) = ddiff['values_changed'] - >>> change1 - - >>> change1.t1 - Decimal('1.52') - >>> change1.t2 - Decimal('1.57') - >>> change1.path() - 'root' - - Approximate float comparison (Significant digits after the point) (Tree View): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') - >>> ddiff - {} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent=2) - { 'values_changed': { , - }} - >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': {}} - - - **Exclude types** - - Exclude certain types from comparison: - >>> l1 = logging.getLogger("test") - >>> l2 = logging.getLogger("test2") - >>> t1 = {"log": l1, 2: 1337} - >>> t2 = {"log": l2, 2: 1337} - >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) - {} - - **Exclude paths** - - Exclude part of your object tree from comparison - use `exclude_paths` and pass a set or list of paths to exclude: - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) - {} - - You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: - >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] - >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) - {} - - example 2: - >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} - >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} - >>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) - {} - - Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. - - - - .. note:: - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - - **Serialization** - - DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. - - Serialize and then deserialize back to deepdiff - >>> t1 = {1: 1, 2: 2, 3: 3} - >>> t2 = {1: 1, 2: "2", 3: 3} - >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json - >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True - - **Pycon 2016 Talk** - I gave a talk about how DeepDiff does what it does at Pycon 2016. - `Diff it to Dig it Pycon 2016 video `_ - - And here is more info: http://zepworks.com/blog/diff-it-to-digg-it/ - - - """ +class DeepDiff(ResultDict): + __doc__ = doc def __init__(self, t1, @@ -661,7 +56,7 @@ def __init__(self, exclude_types=set(), include_string_type_changes=False, verbose_level=1, - view='text', + view=TEXT_VIEW, hasher=DeepHash.murmur3_128bit, **kwargs): if kwargs: @@ -694,15 +89,20 @@ def __init__(self, self.tree.cleanup() - if view == 'tree': - self.update(self.tree) - del self.tree + self.view = view + view_results = self._get_view_results(view) + self.update(view_results) + + def _get_view_results(self, view): + """ + Get the results based on the view + """ + if view == TREE_VIEW: + result = self.tree else: - result_text = TextResult(tree_results=self.tree) - result_text.cleanup() # clean up text-style result dictionary - self.update( - result_text - ) # be compatible to DeepDiff 2.x if user didn't specify otherwise + result = TextResult(tree_results=self.tree) + result.cleanup() # clean up text-style result dictionary + return result # TODO: adding adding functionality # def __add__(self, other): @@ -1141,7 +541,7 @@ def __diff(self, level, parents_ids=frozenset({})): elif isinstance(level.t1, tuple): self.__diff_tuple(level, parents_ids) - elif isinstance(level.t1, (set, frozenset)): + elif isinstance(level.t1, (set, frozenset, OrderedSet)): self.__diff_set(level) elif isinstance(level.t1, Iterable): @@ -1170,7 +570,7 @@ def json(self): def to_json_pickle(self): """ - Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, doing to_json is the safer option. + Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, doing to_json is the safer option that json pickle. """ copied = self.copy() return jsonpickle.encode(copied) @@ -1195,17 +595,21 @@ def from_json_pickle(cls, value): """ return jsonpickle.decode(value) - def to_json(self): + def to_json(self, default_mapping=None): """ Dump json of the text view """ - return json.dumps(self, default=json_convertor_default) + return json.dumps(self.to_dict(), default=json_convertor_default(default_mapping=default_mapping)) def to_dict(self): """ Dump dictionary of the text view """ - return dict(self) + if self.view == TREE_VIEW: + result = dict(self._get_view_results(view=TEXT_VIEW)) + else: + result = dict(self) + return result if __name__ == "__main__": # pragma: no cover diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst new file mode 100644 index 00000000..e81972ca --- /dev/null +++ b/deepdiff/diff_doc.rst @@ -0,0 +1,636 @@ +**DeepDiff** + +Deep Difference of dictionaries, iterables, strings and almost any other object. +It will recursively look for all the changes. + +DeepDiff 3.0 added the concept of views. +There is a default "text" view and a "tree" view. + +**Parameters** + +t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ + This is the first item to be compared to the second item + +t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ + The second item is to be compared to the first one + +ignore_order : Boolean, defalt=False ignores orders for iterables. + Note that if you have iterables contatining any unhashable, ignoring order can be expensive. + Normally ignore_order does not report duplicates and repetition changes. + In order to report repetitions, set report_repetition=True in addition to ignore_order=True + +report_repetition : Boolean, default=False reports repetitions when set True + ONLY when ignore_order is set True too. This works for iterables. + This feature currently is experimental and is not production ready. + +significant_digits : int >= 0, default=None. + If it is a non negative integer, it compares only that many digits AFTER + the decimal point. + + This only affects floats, decimal.Decimal and complex. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits + + Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + +verbose_level : int >= 0, default = 1. + Higher verbose level shows you more details. + For example verbose level 1 shows what dictionary item are added or removed. + And verbose level 2 shows the value of the items that are added or removed too. + +exclude_paths: list, default = None. + List of paths to exclude from the report. + +exclude_regex_paths: list, default = None. + List of regex paths to exclude from the report. + +exclude_types: list, default = None. + List of object types to exclude from the report. + +hasher: default = DeepHash.murmur3_128bit + Hash function to be used. If you don't want Murmur3, you can use Python's built-in hash function + by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. + +view: string, default = text + Starting the version 3 you can choosethe view into the deepdiff results. + The default is the text view which has been the only view up until now. + The new view is called the tree view which allows you to traverse through + the tree of changed items. + +**Returns** + + A DeepDiff object that has already calculated the difference of the 2 items. + +**Supported data types** + +int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! + +**Text View** + +Text view is the original and currently the default view of DeepDiff. + +It is called text view because the results contain texts that represent the path to the data: + +Example of using the text view. + >>> from deepdiff import DeepDiff + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> print(ddiff) + {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} + +So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. + +.. seealso:: + The following examples are using the *default text view.* + The Tree View is introduced in DeepDiff v3 and provides + traversing capabilitie through your diffed data and more! + Read more about the Tree View at the bottom of this page. + +Importing + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + +Same object returns empty + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = t1 + >>> print(DeepDiff(t1, t2)) + {} + +Type of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'new_value': '2', + 'old_type': , + 'old_value': 2}}} + +Value of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} + +Item added and/or removed + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff) + {'dictionary_item_added': {'root[5]', 'root[6]'}, + 'dictionary_item_removed': {'root[4]'}} + +Set verbose level to 2 in order to see the added or removed items with their values + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(ddiff, indent=2) + { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, + 'dictionary_item_removed': {'root[4]': 4}} + +String difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} + >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, + "root[4]['b']": { 'new_value': 'world!', + 'old_value': 'world'}}} + + +String difference 2 + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' + '+++ \n' + '@@ -1,5 +1,4 @@\n' + '-world!\n' + '-Goodbye!\n' + '+world\n' + ' 1\n' + ' 2\n' + ' End', + 'new_value': 'world\n1\n2\nEnd', + 'old_value': 'world!\n' + 'Goodbye!\n' + '1\n' + '2\n' + 'End'}}} + + >>> + >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) + --- + +++ + @@ -1,5 +1,4 @@ + -world! + -Goodbye! + +world + 1 + 2 + End + + +Type change + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'type_changes': { "root[4]['b']": { 'new_type': , + 'new_value': 'world\n\n\nEnd', + 'old_type': , + 'old_value': [1, 2, 3]}}} + +And if you don't care about the value of items that have changed type, please set verbose level to 0 + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'old_type': }}} + +List difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} + +List difference 2: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'iterable_item_added': {"root[4]['b'][3]": 3}, + 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, + "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} + +List difference ignoring order or duplicates: (with the same dictionaries as above) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, ignore_order=True) + >>> print (ddiff) + {} + +List difference ignoring order but reporting repetitions: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': {'root[1]': 3}, + 'repetition_change': { 'root[0]': { 'new_indexes': [2], + 'new_repeat': 1, + 'old_indexes': [0, 2], + 'old_repeat': 2, + 'value': 1}, + 'root[3]': { 'new_indexes': [0, 1], + 'new_repeat': 2, + 'old_indexes': [3], + 'old_repeat': 1, + 'value': 4}}} + +List that contains dictionary: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': {"root[4]['b'][2][2]"}, + 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} + +Sets: + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff) + {'set_item_added': {'root[5]', 'root[3]'}, 'set_item_removed': {'root[8]'}} + +Named Tuples: + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> pprint (DeepDiff(t1, t2)) + {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} + +Custom objects: + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + +Object attribute added: + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2)) + {'attribute_added': {'root.c'}, + 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + +Approximate decimals comparison (Significant digits after the point): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0) + {} + >>> DeepDiff(t1, t2, significant_digits=1) + {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} + +Approximate float comparison (Significant digits after the point): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> pprint(DeepDiff(t1, t2, significant_digits=3)) + {} + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, + 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} + >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) + {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} + + +.. note:: + All the examples for the text view work for the tree view too. + You just need to set view='tree' to get it in tree form. + + +**Tree View** + +Starting the version 3 You can chooe the view into the deepdiff results. +The tree view provides you with tree objects that you can traverse through to find +the parents of the objects that are diffed and the actual objects that are being diffed. +This view is very useful when dealing with nested objects. +Note that tree view always returns results in the form of Python sets. + +You can traverse through the tree elements! + +.. note:: + The Tree view is just a different representation of the diffed data. + Behind the scene, DeepDiff creates the tree view first and then converts it to textual + representation for the text view. + +.. code:: text + + +---------------------------------------------------------------+ + | | + | parent(t1) parent node parent(t2) | + | + ^ + | + +------|--------------------------|---------------------|-------+ + | | | up | + | Child | | | ChildRelationship + | Relationship | | | + | down | | | + +------|----------------------|-------------------------|-------+ + | v v v | + | child(t1) child node child(t2) | + | | + +---------------------------------------------------------------+ + + +:up: Move up to the parent node +:down: Move down to the child node +:path(): Get the path to the current node +:t1: The first item in the current node that is being diffed +:t2: The second item in the current node that is being diffed +:additional: Additional information about the node i.e. repetition +:repetition: Shortcut to get the repetition report + + +The tree view allows you to have more than mere textual representaion of the diffed objects. +It gives you the actual objects (t1, t2) throughout the tree of parents and children. + +**Examples Tree View** + +.. note:: + The Tree View is introduced in DeepDiff 3. + Set view='tree' in order to use this view. + +Value of an item has changed (Tree View) + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') + >>> ddiff_verbose0 + {'values_changed': {}} + >>> + >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') + >>> ddiff_verbose1 + {'values_changed': {}} + >>> set_of_values_changed = ddiff_verbose1['values_changed'] + >>> # since set_of_values_changed includes only one item in a set + >>> # in order to get that one item we can: + >>> (changed,) = set_of_values_changed + >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] + + >>> changed.t1 + 2 + >>> changed.t2 + 4 + >>> # You can traverse through the tree, get to the parents! + >>> changed.up + + +List difference (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff + {'iterable_item_removed': {, }} + >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. + >>> # One way to get one item from the set is to convert it to a list + >>> # And then get the first item of the list: + >>> removed = list(ddiff['iterable_item_removed'])[0] + >>> removed + + >>> + >>> parent = removed.up + >>> parent + + >>> parent.path() + "root[4]['b']" + >>> parent.t1 + [1, 2, 3, 4] + >>> parent.t2 + [1, 2] + >>> parent.up + + >>> parent.up.up + + >>> parent.up.up.t1 + {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} + >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff + True + +List difference 2 (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent = 2) + { 'iterable_item_added': {}, + 'values_changed': { , + }} + >>> + >>> # Note that iterable_item_added is a set with one item. + >>> # So in order to get that one item from it, we can do: + >>> + >>> (added,) = ddiff['iterable_item_added'] + >>> added + + >>> added.up.up + + >>> added.up.up.path() + 'root[4]' + >>> added.up.up.down + + >>> + >>> # going up twice and then down twice gives you the same node in the tree: + >>> added.up.up.down.down == added + True + +List difference ignoring order but reporting repetitions (Tree View) + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': {}, + 'repetition_change': { , + }} + >>> + >>> # repetition_change is a set with 2 items. + >>> # in order to get those 2 items, we can do the following. + >>> # or we can convert the set to list and get the list items. + >>> # or we can iterate through the set items + >>> + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 # the default verbosity is set to 1. + + >>> # The actual data regarding the repetitions can be found in the repetition attribute: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> + >>> # If you change the verbosity, you will see less: + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) + >>> ddiff + {'repetition_change': {, }, 'iterable_item_removed': {}} + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 + + >>> + >>> # But the verbosity level does not change the actual report object. + >>> # It only changes the textual representaion of the object. We get the actual object here: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> repeat1.t1 + 4 + >>> repeat1.t2 + 4 + >>> repeat1.up + + +List that contains dictionary (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': {}, + 'values_changed': {}} + +Sets (Tree View): + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> print(ddiff) + {'set_item_removed': {}, 'set_item_added': {, }} + >>> # grabbing one item from set_item_removed set which has one item only + >>> (item,) = ddiff['set_item_removed'] + >>> item.up + + >>> item.up.t1 == t1 + True + +Named Tuples (Tree View): + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': {}} + +Custom objects (Tree View): + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': {}} + +Object attribute added (Tree View): + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2, view='tree')) + {'attribute_added': {}, + 'values_changed': {}} + +Approximate decimals comparison (Significant digits after the point) (Tree View): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0, view='tree') + {} + >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': {}} + >>> (change1,) = ddiff['values_changed'] + >>> change1 + + >>> change1.t1 + Decimal('1.52') + >>> change1.t2 + Decimal('1.57') + >>> change1.path() + 'root' + +Approximate float comparison (Significant digits after the point) (Tree View): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') + >>> ddiff + {} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent=2) + { 'values_changed': { , + }} + >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': {}} + + +**Exclude types** + +Exclude certain types from comparison: + >>> l1 = logging.getLogger("test") + >>> l2 = logging.getLogger("test2") + >>> t1 = {"log": l1, 2: 1337} + >>> t2 = {"log": l2, 2: 1337} + >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) + {} + +**Exclude paths** + +Exclude part of your object tree from comparison +use `exclude_paths` and pass a set or list of paths to exclude: + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) + {} + +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: + >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) + {} + +example 2: + >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} + >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} + >>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) + {} + +Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. + + + +.. note:: + All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. + +**Serialization** + +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +Note that to_dict will use the text view even if you did the diff in tree view. + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_dict() + {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} + + +In order to do safe json serialization, use the to_json() method. + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_json() + '{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' + + +If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. + +Serialize and then deserialize back to deepdiff + >>> t1 = {1: 1, 2: 2, 3: 3} + >>> t2 = {1: 1, 2: "2", 3: 3} + >>> ddiff = DeepDiff(t1, t2) + >>> jsoned = ddiff.to_json_pickle + >>> jsoned + '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' + >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) + >>> ddiff == ddiff_new + True + +**Pycon 2016 Talk** +I gave a talk about how DeepDiff does what it does at Pycon 2016. +`Diff it to Dig it Pycon 2016 video `_ + +And here is more info: http://zepworks.com/blog/diff-it-to-digg-it/ + + diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 38a97a5c..404d7d00 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -3,6 +3,7 @@ import datetime from decimal import Decimal from collections import namedtuple +from ordered_set import OrderedSet import logging logger = logging.getLogger(__name__) @@ -40,7 +41,7 @@ def short_repr(item, max_length=15): return item -class ListItemRemovedOrAdded(object): # pragma: no cover +class ListItemRemovedOrAdded: # pragma: no cover """Class of conditions to be checked""" pass @@ -104,14 +105,41 @@ class NotPresent(OtherTypes): # pragma: no cover # raise KeyError(new_key) -class Verbose(object): +class Verbose: """ Global verbose level """ level = 1 -def json_convertor_default(obj): - if isinstance(obj, Decimal): - return float(obj) - raise TypeError +class indexed_set(set): + """ + A set class that lets you get an item by index + + >>> a = indexed_set() + >>> a.add(10) + >>> a.add(20) + >>> a[0] + 10 + """ + + +JSON_CONVERTOR = { + Decimal: float, + OrderedSet: list, + type: lambda x: x.__name__, +} + + +def json_convertor_default(default_mapping=None): + _convertor_mapping = JSON_CONVERTOR.copy() + if default_mapping: + _convertor_mapping.update(default_mapping) + + def _convertor(obj): + for original_type, convert_to in _convertor_mapping.items(): + if isinstance(obj, original_type): + return convert_to(obj) + raise TypeError('We do not know how to convert {} of type {} for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type.'.format(obj, type(obj))) + + return _convertor diff --git a/deepdiff/model.py b/deepdiff/model.py index 82283a41..763c81d9 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -3,6 +3,7 @@ from deepdiff.helper import RemapDict, strings, short_repr, Verbose, notpresent from ast import literal_eval from copy import copy +from ordered_set import OrderedSet FORCE_DEFAULT = 'fake' UP_DOWN = {'up': 'down', 'down': 'up'} @@ -42,7 +43,7 @@ def cleanup(self): class TreeResult(ResultDict): def __init__(self): for key in REPORT_KEYS: - self[key] = set() + self[key] = OrderedSet() class TextResult(ResultDict): @@ -59,8 +60,8 @@ def __init__(self, tree_results=None): "iterable_item_removed": {}, "attribute_added": self.__set_or_dict(), "attribute_removed": self.__set_or_dict(), - "set_item_removed": set(), - "set_item_added": set(), + "set_item_removed": OrderedSet(), + "set_item_added": OrderedSet(), "repetition_change": {} }) @@ -68,7 +69,7 @@ def __init__(self, tree_results=None): self._from_tree_results(tree_results) def __set_or_dict(self): - return {} if Verbose.level >= 2 else set() + return {} if Verbose.level >= 2 else OrderedSet() def _from_tree_results(self, tree): """ @@ -102,7 +103,7 @@ def _from_tree_default(self, tree, report_type): # do the reporting report = self[report_type] - if isinstance(report, set): + if isinstance(report, OrderedSet): report.add(change.path(force=FORCE_DEFAULT)) elif isinstance(report, dict): report[change.path(force=FORCE_DEFAULT)] = item diff --git a/requirements.txt b/requirements.txt index 2146c923..38a19407 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ mmh3==2.5.1 jsonpickle==1.0 +ordered-set==3.1 diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 4b1d113e..08b61cba 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -46,7 +46,9 @@ def test_serialization_tree(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') - jsoned = ddiff.to_json_pickle() + pickle_jsoned = ddiff.to_json_pickle() + assert "world" in pickle_jsoned + jsoned = ddiff.to_json() assert "world" in jsoned def test_deserialization_tree(self): From 16821043531a67128c3c585285893e814d742e6a Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 10 Feb 2019 23:04:09 -0800 Subject: [PATCH 40/76] updating not present --- deepdiff/helper.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 404d7d00..578f0e41 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -65,13 +65,16 @@ class NotHashed(OtherTypes): pass -class NotPresent(OtherTypes): # pragma: no cover +class NotPresent: # pragma: no cover """ In a change tree, this indicated that a previously existing object has been removed -- or will only be added in the future. We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D """ - pass + def __repr__(self): + return 'not present' # pragma: no cover + + __str__ = __repr__ unprocessed = Unprocessed() From 1ec830bd9b91b79493fc9bfd543d0b827db53834 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 11 Feb 2019 00:22:19 -0800 Subject: [PATCH 41/76] fixing docs --- README.md | 5 ++++- deepdiff/diff_doc.rst | 9 ++++++--- deepdiff/model.py | 19 ++++++++++++++----- docs/index.rst | 22 +++++++++------------- tests/test_diff_tree.py | 5 +---- 5 files changed, 34 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 716ee35f..36919ade 100644 --- a/README.md +++ b/README.md @@ -143,13 +143,16 @@ use `exclude_paths` and pass a set or list of paths to exclude: ### Exclude Regex Paths -You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. ```python >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] >>> print(DeepDiff(t1, t2, exclude_regex_paths={r"root\[\d+\]\['b'\]"})) {} +>>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") +>>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) +{} ``` example 2: diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index e81972ca..e9a8777d 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -44,7 +44,7 @@ exclude_paths: list, default = None. List of paths to exclude from the report. exclude_regex_paths: list, default = None. - List of regex paths to exclude from the report. + List of string regex paths or compiled regex paths objects to exclude from the report. exclude_types: list, default = None. List of object types to exclude from the report. @@ -572,10 +572,13 @@ use `exclude_paths` and pass a set or list of paths to exclude: >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) {} -You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude: +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - >>> print (DeepDiff(t1, t2, exclude_regex_paths={"root\[\d+\]\['b'\]"})) + >>> print(DeepDiff(t1, t2, exclude_regex_paths={r"root\[\d+\]\['b'\]"})) + {} + >>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") + >>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) {} example 2: diff --git a/deepdiff/model.py b/deepdiff/model.py index 763c81d9..2b7a00da 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -40,10 +40,19 @@ def cleanup(self): del self[k] +class PrettyOrderedSet(OrderedSet): + """ + From the perspective of the users of the library, they are dealing with lists. + Behind the scene, we have ordered sets. + """ + def __repr__(self): + return '[{}]'.format(", ".join(map(str, self))) + + class TreeResult(ResultDict): def __init__(self): for key in REPORT_KEYS: - self[key] = OrderedSet() + self[key] = PrettyOrderedSet() class TextResult(ResultDict): @@ -60,8 +69,8 @@ def __init__(self, tree_results=None): "iterable_item_removed": {}, "attribute_added": self.__set_or_dict(), "attribute_removed": self.__set_or_dict(), - "set_item_removed": OrderedSet(), - "set_item_added": OrderedSet(), + "set_item_removed": PrettyOrderedSet(), + "set_item_added": PrettyOrderedSet(), "repetition_change": {} }) @@ -69,7 +78,7 @@ def __init__(self, tree_results=None): self._from_tree_results(tree_results) def __set_or_dict(self): - return {} if Verbose.level >= 2 else OrderedSet() + return {} if Verbose.level >= 2 else PrettyOrderedSet() def _from_tree_results(self, tree): """ @@ -103,7 +112,7 @@ def _from_tree_default(self, tree, report_type): # do the reporting report = self[report_type] - if isinstance(report, OrderedSet): + if isinstance(report, PrettyOrderedSet): report.add(change.path(force=FORCE_DEFAULT)) elif isinstance(report, dict): report[change.path(force=FORCE_DEFAULT)] = item diff --git a/docs/index.rst b/docs/index.rst index 48834d44..d7cea390 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,6 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. + DeepDiff 4.0.0 documentation! ============================= @@ -396,22 +397,17 @@ Read more in the Deep Hash reference: .. _significant\_digits: #significant-digits .. _views: #views -DeepDiff Reference -================== - -:doc:`/diff` - - -DeepSearch Reference -==================== -:doc:`/dsearch` +References +========== +.. toctree:: + :maxdepth: 2 -DeepHash Reference -==================== + diff + dsearch + contenthash -:doc:`/contenthash` Indices and tables ================== @@ -424,7 +420,7 @@ Indices and tables Changelog ========= -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index 8c748087..fa9f299a 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -150,10 +150,7 @@ def test_repr(self): t1 = {1, 2, 8} t2 = {1, 2, 3, 5} ddiff = DeepDiff(t1, t2, view='tree') - try: - str(ddiff) - except Exception as e: - self.fail("Converting ddiff to string raised: {}".format(e)) + str(ddiff) class TestDeepDiffTreeWithNumpy: From 1042dffe0359a1eccf68052e1b11c950c2026bcf Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 11 Feb 2019 00:30:40 -0800 Subject: [PATCH 42/76] updating docs --- README.md | 33 +++++++++++++++++++++++++++++---- docs/index.rst | 1 + 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2f9259aa..7804b74c 100644 --- a/README.md +++ b/README.md @@ -895,18 +895,43 @@ Decimal('1.57') ## Serialization -DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. This works for both tree view and text view. +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +Note that to_dict will use the text view even if you did the diff in tree view. -### Serialize and then deserialize back to deepdiff +Example: + +```python +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_dict() +{'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} +``` + +In order to do safe json serialization, use the to_json() method. + +Example: + +```python +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_json() +'{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' +``` + +If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. + +Serialize and then deserialize back to deepdiff ```python >>> t1 = {1: 1, 2: 2, 3: 3} >>> t2 = {1: 1, 2: "2", 3: 3} >>> ddiff = DeepDiff(t1, t2) ->>> jsoned = ddiff.json +>>> jsoned = ddiff.to_json_pickle >>> jsoned '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' ->>> ddiff_new = DeepDiff.from_json(jsoned) +>>> ddiff_new = DeepDiff.from_json_pickle(jsoned) >>> ddiff == ddiff_new True ``` diff --git a/docs/index.rst b/docs/index.rst index d7cea390..2adc25df 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -487,4 +487,5 @@ ALso thanks to: - sreecodeslayer for DeepSearch match_string - Brian Maissy (brianmaissy) for weakref fix and enum tests - Bartosz Borowik (boba-2) for Exclude types fix when ignoring order +- Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared From ab45e857f7319fb8acb876b774618f04d1eccb9c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 11 Feb 2019 23:42:10 -0800 Subject: [PATCH 43/76] making add_to_frozen_set into its own function --- deepdiff/contenthash.py | 12 +++--------- deepdiff/diff.py | 38 ++++++++++++++++++-------------------- deepdiff/helper.py | 4 ++++ deepdiff/search.py | 12 +++--------- 4 files changed, 28 insertions(+), 38 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index c53349b3..4c531595 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -8,7 +8,7 @@ import mmh3 import logging -from deepdiff.helper import strings, numbers, unprocessed, skipped, not_hashed +from deepdiff.helper import strings, numbers, unprocessed, skipped, not_hashed, add_to_frozen_set logger = logging.getLogger(__name__) @@ -213,12 +213,6 @@ def __getitem__(self, key): return super().__getitem__(key) - @staticmethod - def _add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): """Difference of 2 objects""" try: @@ -253,7 +247,7 @@ def _prep_dict(self, obj, parents_ids=frozenset({})): item_id = id(item) if (parents_ids and item_id in parents_ids) or self._skip_this(item): continue - parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parents_ids_added) hashed = "{}:{}".format(key_hash, hashed) result.append(hashed) @@ -279,7 +273,7 @@ def _prep_iterable(self, obj, parents_ids=frozenset({})): if parents_ids and item_id in parents_ids: continue - parents_ids_added = self._add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parents_ids_added) # counting repetitions result[hashed] += 1 diff --git a/deepdiff/diff.py b/deepdiff/diff.py index ce73bab8..abb30562 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -22,7 +22,7 @@ from ordered_set import OrderedSet from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, - IndexedHash, Verbose, unprocessed, json_convertor_default) + IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship @@ -50,26 +50,27 @@ def __init__(self, ignore_order=False, report_repetition=False, significant_digits=None, - exclude_paths=set(), - exclude_regex_paths=set(), - exclude_types=set(), + exclude_paths=None, + exclude_regex_paths=None, + exclude_types=None, include_string_type_changes=False, verbose_level=1, view=TEXT_VIEW, hasher=DeepHash.murmur3_128bit, + transformer=None, **kwargs): if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are ignore_order, report_repetition, significant_digits," - "exclude_paths, exclude_types, exclude_regex_paths, verbose_level and view.") % ', '.join(kwargs.keys())) + "exclude_paths, exclude_types, exclude_regex_paths, transformer, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order self.report_repetition = report_repetition - self.exclude_paths = set(exclude_paths) - self.exclude_regex_paths = [i if isinstance(i, re.Pattern) else re.compile(i) for i in exclude_regex_paths] - self.exclude_types = set(exclude_types) - self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance + self.exclude_paths = set(exclude_paths) if exclude_paths else None + self.exclude_regex_paths = [i if isinstance(i, re.Pattern) else re.compile(i) for i in exclude_regex_paths] if exclude_regex_paths else None + self.exclude_types = set(exclude_types) if exclude_types else None + self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance self.include_string_type_changes = include_string_type_changes self.hashes = {} self.hasher = hasher @@ -83,6 +84,10 @@ def __init__(self, Verbose.level = verbose_level + if transformer: + t1 = transformer(t1) + t2 = transformer(t2) + root = DiffLevel(t1, t2) self.__diff(root, parents_ids=frozenset({id(t1)})) @@ -138,12 +143,6 @@ def __report_result(self, report_type, level): level.report_type = report_type self.tree[report_type].add(level) - @staticmethod - def __add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - @staticmethod def __dict_from_slots(object): def unmangle(attribute): @@ -208,8 +207,8 @@ def __skip_this(self, level): [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]): skip = True else: - if isinstance(level.t1, self.exclude_types_tuple) or isinstance( - level.t2, self.exclude_types_tuple): + if self.exclude_types_tuple and (isinstance(level.t1, self.exclude_types_tuple) or + isinstance(level.t2, self.exclude_types_tuple)): skip = True return skip @@ -268,7 +267,7 @@ def __diff_dict(self, item_id = id(t1[key]) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( @@ -344,8 +343,7 @@ def __diff_iterable(self, level, parents_ids=frozenset({})): item_id = id(x) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, - item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 578f0e41..a5972ebf 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -146,3 +146,7 @@ def _convertor(obj): raise TypeError('We do not know how to convert {} of type {} for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type.'.format(obj, type(obj))) return _convertor + + +def add_to_frozen_set(parents_ids, item_id): + return parents_ids | {item_id} diff --git a/deepdiff/search.py b/deepdiff/search.py index 0fc8c9ca..4f07de34 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -6,7 +6,7 @@ from collections.abc import MutableMapping, Iterable import logging -from deepdiff.helper import strings, numbers +from deepdiff.helper import strings, numbers, add_to_frozen_set logger = logging.getLogger(__name__) @@ -123,12 +123,6 @@ def __report(self, report_key, key, value): else: self[report_key].add(key) - @staticmethod - def __add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - def __search_obj(self, obj, item, @@ -203,7 +197,7 @@ def __search_dict(self, if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() @@ -246,7 +240,7 @@ def __search_iterable(self, item_id = id(thing) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) self.__search(thing, item, "%s[%s]" % (parent, i), parents_ids_added) From 7d4b0af2001aadb96296e9d5ef56924be44cff25 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 18:57:15 -0800 Subject: [PATCH 44/76] wip adding path stuff to content hash --- deepdiff/contenthash.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 4c531595..8478c9ea 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -5,6 +5,7 @@ from collections import defaultdict from decimal import Decimal from hashlib import sha1 +import re import mmh3 import logging @@ -154,6 +155,8 @@ def __init__(self, obj, hashes=None, exclude_types=None, + exclude_paths=None, + exclude_regex_paths=None, hasher=None, ignore_repetition=True, significant_digits=None, @@ -169,6 +172,8 @@ def __init__(self, exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition + self.exclude_paths = set(exclude_paths) if exclude_paths else None + self.exclude_regex_paths = [i if isinstance(i, re.Pattern) else re.compile(i) for i in exclude_regex_paths] if exclude_regex_paths else None self.hasher = self.murmur3_128bit if hasher is None else hasher hashes = hashes if hashes else {} @@ -231,13 +236,38 @@ def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) return result + # def _skip_this(self, obj): + # skip = False + # if isinstance(obj, self.exclude_types_tuple): + # skip = True + + # return skip + + def _skip_this(self, obj): + # TODO: we need to have access to the path of this object in order to be able to exclude it when needed. skip = False if isinstance(obj, self.exclude_types_tuple): skip = True return skip + + skip = False + if self.exclude_paths and level.path() in self.exclude_paths: + skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]): + skip = True + else: + if self.exclude_types_tuple and (isinstance(level.t1, self.exclude_types_tuple) or + isinstance(level.t2, self.exclude_types_tuple)): + skip = True + + return skip + + + def _prep_dict(self, obj, parents_ids=frozenset({})): result = [] From ed8ec586012b4fcfdc73ca8ad492cf834417ca74 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 22:12:23 -0800 Subject: [PATCH 45/76] adding path skipping to the content hash --- deepdiff/contenthash.py | 109 +++++++++++++++++++++------------------- deepdiff/helper.py | 24 ++++++++- tests/test_hash.py | 11 ++++ 3 files changed, 90 insertions(+), 54 deletions(-) diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py index 8478c9ea..fe09d8ff 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/contenthash.py @@ -5,16 +5,20 @@ from collections import defaultdict from decimal import Decimal from hashlib import sha1 -import re import mmh3 import logging -from deepdiff.helper import strings, numbers, unprocessed, skipped, not_hashed, add_to_frozen_set +from deepdiff.helper import (strings, numbers, unprocessed, skipped, not_hashed, add_to_frozen_set, + convert_item_or_items_into_set_else_none, + convert_item_or_items_into_compiled_regexes_else_none) logger = logging.getLogger(__name__) UNPROCESSED = 'unprocessed' RESERVED_DICT_KEYS = {UNPROCESSED} +EMPTY_FROZENSET = frozenset({}) + +INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') def prepare_string_for_hashing(obj, include_string_type_changes=False): @@ -166,14 +170,14 @@ def __init__(self, if kwargs: raise ValueError( ("The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, hashes, exclude_types." - "hasher and ignore_repetition.") % ', '.join(kwargs.keys())) + "The valid parameters are obj, hashes, exclude_types," + "exclude_paths, exclude_regex_paths, hasher and ignore_repetition.") % ', '.join(kwargs.keys())) self.obj = obj exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance self.ignore_repetition = ignore_repetition - self.exclude_paths = set(exclude_paths) if exclude_paths else None - self.exclude_regex_paths = [i if isinstance(i, re.Pattern) else re.compile(i) for i in exclude_regex_paths] if exclude_regex_paths else None + self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) + self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.hasher = self.murmur3_128bit if hasher is None else hasher hashes = hashes if hashes else {} @@ -186,7 +190,7 @@ def __init__(self, # testing the individual hash functions for different types of objects. self.constant_size = constant_size - self._hash(obj, parents_ids=frozenset({id(obj)})) + self._hash(obj, parent="root", parents_ids=frozenset({id(obj)})) if self[UNPROCESSED]: logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED])) @@ -207,7 +211,20 @@ def murmur3_128bit(obj): # 1203 is the seed return mmh3.hash64(obj, 1203) + def _get_item(self, key, changed_to_id=False): + try: + value = super().__getitem__(key) + except KeyError: + if changed_to_id: + raise KeyError('{} is not one of the hashed items.'.format(key)) from None + else: + key = id(key) + value = self._get_item(key, changed_to_id=True) + else: + return value + def __getitem__(self, key): + changed_to_id = False if not isinstance(key, int): try: if key in RESERVED_DICT_KEYS: @@ -215,10 +232,11 @@ def __getitem__(self, key): except Exception: pass key = id(key) + changed_to_id = True - return super().__getitem__(key) + return self._get_item(key, changed_to_id=changed_to_id) - def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): + def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=False): """Difference of 2 objects""" try: if is_namedtuple: @@ -232,53 +250,38 @@ def _prep_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): self[UNPROCESSED].append(obj) return unprocessed - result = self._prep_dict(obj, parents_ids) + result = self._prep_dict(obj, parent, parents_ids, print_as_attribute=True) result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) return result - # def _skip_this(self, obj): - # skip = False - # if isinstance(obj, self.exclude_types_tuple): - # skip = True - - # return skip - - - def _skip_this(self, obj): - # TODO: we need to have access to the path of this object in order to be able to exclude it when needed. + def _skip_this(self, obj, parent): skip = False - if isinstance(obj, self.exclude_types_tuple): - skip = True - - return skip - - - skip = False - if self.exclude_paths and level.path() in self.exclude_paths: + if self.exclude_paths and parent in self.exclude_paths: skip = True elif self.exclude_regex_paths and any( - [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]): + [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): skip = True else: - if self.exclude_types_tuple and (isinstance(level.t1, self.exclude_types_tuple) or - isinstance(level.t2, self.exclude_types_tuple)): + if self.exclude_types_tuple and isinstance(obj, self.exclude_types_tuple): skip = True return skip - - - def _prep_dict(self, obj, parents_ids=frozenset({})): + def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False): result = [] + key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute]) for key, item in obj.items(): - key_hash = self._hash(key) + key_formatted = "'%s'" % key if not print_as_attribute and isinstance(key, strings) else key + key_in_report = key_text % (parent, key_formatted) + + key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids) item_id = id(item) - if (parents_ids and item_id in parents_ids) or self._skip_this(item): + if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report): continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) - hashed = self._hash(item, parents_ids_added) + hashed = self._hash(item, parent=key_in_report, parents_ids=parents_ids_added) hashed = "{}:{}".format(key_hash, hashed) result.append(hashed) @@ -288,15 +291,15 @@ def _prep_dict(self, obj, parents_ids=frozenset({})): return result - def _prep_set(self, obj): - return "set:{}".format(self._prep_iterable(obj)) + def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): + return "set:{}".format(self._prep_iterable(obj, parent, parents_ids)) - def _prep_iterable(self, obj, parents_ids=frozenset({})): + def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = defaultdict(int) - for item in obj: - if self._skip_this(item): + for i, item in enumerate(obj): + if self._skip_this(item, parent="{}[{}]".format(parent, i)): continue item_id = id(item) @@ -304,7 +307,7 @@ def _prep_iterable(self, obj, parents_ids=frozenset({})): continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) - hashed = self._hash(item, parents_ids_added) + hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added) # counting repetitions result[hashed] += 1 @@ -335,20 +338,20 @@ def _prep_number(self, obj): result = "{}:{}".format(type(obj).__name__, obj) return result - def _prep_tuple(self, obj, parents_ids): + def _prep_tuple(self, obj, parent, parents_ids): # Checking to see if it has _fields. Which probably means it is a named # tuple. try: obj._asdict # It must be a normal tuple except AttributeError: - result = self._prep_iterable(obj, parents_ids) + result = self._prep_iterable(obj, parent, parents_ids) # We assume it is a namedtuple then else: - result = self._prep_obj(obj, parents_ids, is_namedtuple=True) + result = self._prep_obj(obj, parent, parents_ids=parents_ids, is_namedtuple=True) return result - def _hash(self, obj, parents_ids=frozenset({})): + def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): """The main diff method""" obj_id = id(obj) @@ -357,7 +360,7 @@ def _hash(self, obj, parents_ids=frozenset({})): result = not_hashed - if self._skip_this(obj): + if self._skip_this(obj, parent): result = skipped elif obj is None: @@ -370,19 +373,19 @@ def _hash(self, obj, parents_ids=frozenset({})): result = self._prep_number(obj) elif isinstance(obj, MutableMapping): - result = self._prep_dict(obj, parents_ids) + result = self._prep_dict(obj, parent, parents_ids) elif isinstance(obj, tuple): - result = self._prep_tuple(obj, parents_ids) + result = self._prep_tuple(obj, parent, parents_ids) elif isinstance(obj, (set, frozenset)): - result = self._prep_set(obj) + result = self._prep_set(obj, parent, parents_ids) elif isinstance(obj, Iterable): - result = self._prep_iterable(obj, parents_ids) + result = self._prep_iterable(obj, parent, parents_ids) else: - result = self._prep_obj(obj, parents_ids) + result = self._prep_obj(obj, parent, parents_ids) if result is not_hashed: # pragma: no cover self[UNPROCESSED].append(obj) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index a5972ebf..8a4dbcdb 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- import sys import datetime +import re +import logging from decimal import Decimal from collections import namedtuple from ordered_set import OrderedSet -import logging logger = logging.getLogger(__name__) @@ -150,3 +151,24 @@ def _convertor(obj): def add_to_frozen_set(parents_ids, item_id): return parents_ids | {item_id} + + +def convert_item_or_items_into_set_else_none(items): + if items: + if isinstance(items, strings): + items = set([items]) + else: + items = set(items) + else: + items = None + return items + + +def convert_item_or_items_into_compiled_regexes_else_none(items): + if items: + if isinstance(items, (strings, re.Pattern)): + items = [items] + items = [i if isinstance(i, re.Pattern) else re.compile(i) for i in items] + else: + items = None + return items diff --git a/tests/test_hash.py b/tests/test_hash.py index e749fe6d..fd64e15a 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -372,6 +372,17 @@ def test_skip_str_type_in_dict_on_list(self): assert id(1) in t1_hash assert t1_hash[dic1] == t2_hash[dic2] + def test_skip_path(self): + dic1 = {1: "a"} + t1 = [dic1, 2] + dic2 = {} + t2 = [dic2, 2] + t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]']) + t2_hash = DeepHashPrep(t2, exclude_paths=['root[0]']) + # assert id(1) not in t1_hash + # assert id(2) in t1_hash + assert t1_hash[2] == t2_hash[2] + class TestDeepHashSHA1: """DeepHash with SHA1 Tests.""" From f3b8badd2379701ace292a48e1c2a70b2791d99d Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 22:27:24 -0800 Subject: [PATCH 46/76] Renaming contenthash to deephash --- README.md | 2 +- deepdiff/__init__.py | 2 +- deepdiff/{contenthash.py => deephash.py} | 134 ++--------------------- deepdiff/deephash_doc.rst | 118 ++++++++++++++++++++ deepdiff/diff.py | 16 +-- deepdiff/helper.py | 3 + docs/contenthash.rst | 2 +- docs/index.rst | 6 +- tests/test_hash.py | 16 +-- 9 files changed, 153 insertions(+), 146 deletions(-) rename deepdiff/{contenthash.py => deephash.py} (64%) create mode 100644 deepdiff/deephash_doc.rst diff --git a/README.md b/README.md index 7804b74c..695d944d 100644 --- a/README.md +++ b/README.md @@ -950,7 +950,7 @@ And here is more info: ## Change log -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 58b44d4b..a64b76ab 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -8,4 +8,4 @@ from .diff import DeepDiff from .search import DeepSearch, grep -from .contenthash import DeepHash +from .deephash import DeepHash diff --git a/deepdiff/contenthash.py b/deepdiff/deephash.py similarity index 64% rename from deepdiff/contenthash.py rename to deepdiff/deephash.py index fe09d8ff..cf2134f9 100644 --- a/deepdiff/contenthash.py +++ b/deepdiff/deephash.py @@ -1,15 +1,16 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import os +import mmh3 +import logging from collections import Iterable from collections import MutableMapping from collections import defaultdict from decimal import Decimal from hashlib import sha1 -import mmh3 -import logging from deepdiff.helper import (strings, numbers, unprocessed, skipped, not_hashed, add_to_frozen_set, - convert_item_or_items_into_set_else_none, + convert_item_or_items_into_set_else_none, current_dir, convert_item_or_items_into_compiled_regexes_else_none) logger = logging.getLogger(__name__) @@ -33,127 +34,14 @@ def prepare_string_for_hashing(obj, include_string_type_changes=False): return obj -class DeepHash(dict): - r""" - **DeepHash** - - DeepHash calculates the hash of objects based on their contents in a deterministic way. - This way 2 objects with the same content should have the same hash. - - The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. - For example you can use DeepHash to calculate the hash of a set or a dictionary! - - The core of DeepHash is a deterministic serialization of your object into a string so it - can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. - but you can pass another hash function to it if you want. - - **Parameters** - - obj : any object, The object to be hashed based on its content. - - hashes : dictionary, default = empty dictionary. - A dictionary of {object id: object hash} to start with. - Any object that is encountered and its id is already in the hashes dictionary, - will re-use the hash that is provided by this dictionary instead of re-calculating - its hash. - - exclude_types: list, default = None. - List of object types to exclude from hashing. - - hasher: function. default = DeepHash.murmur3_128bit - hasher is the hashing function. The default is DeepHash.murmur3_128bit. - But you can pass another hash function to it if you want. - For example a cryptographic hash function or Python's builtin hash function. - All it needs is a function that takes the input in string format and returns the hash. - - You can use it by passing: hasher=hash for Python's builtin hash. - - SHA1 is already provided as an alternative too: - You can use it by passing: hasher=DeepHash.sha1hex - - ignore_repetition: Boolean, default = True - If repetitions in an iterable should cause the hash of iterable to be different. - Note that the deepdiff diffing functionality lets this to be the default at all times. - But if you are using DeepHash directly, you can set this parameter. - - significant_digits : int >= 0, default=None. - If it is a non negative integer, it compares only that many digits AFTER - the decimal point. +with open(os.path.join(current_dir, 'deephash_doc.rst'), 'r') as doc_file: + doc = doc_file.read() - This only affects floats, decimal.Decimal and complex. - Takse a look at DeepDiff.diff docs for explanation of how this works. - - constant_size: Boolean, default = True - What DeepHash does is to "prep" the contents of objects into strings. - If constant_size is set, then it actually goes ahead and hashes the string - using the hasher function. - - The only time you want the constant_size to be False is if you want to know what - the string representation of your object is BEFORE it gets hashed. - - include_string_type_changes: Boolean, default = False - string type conversions should not affect the hash output when this is set to False. - For example "Hello" and b"Hello" should produce the same hash. - - **Returns** - A dictionary of {item id: item hash}. - If your object is nested, it will build hashes of all the objects it contains! - - - **Examples** - - Let's say you have a dictionary object. - >>> from deepdiff import DeepHash - >>> - >>> obj = {1: 2, 'a': 'b'} - - If you try to hash it: - >>> hash(obj) - Traceback (most recent call last): - File "", line 1, in - TypeError: unhashable type: 'dict' - - But with DeepHash: - >>> from deepdiff import DeepHash - >>> obj = {1: 2, 'a': 'b'} - >>> DeepHash(obj) - {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} - - So what is exactly the hash of obj in this case? - DeepHash is calculating the hash of the obj and any other object that obj contains. - The output of DeepHash is a dictionary of object IDs to their hashes. - In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: - >>> hashes = DeepHash(obj) - >>> hashes[obj] - (3415089864575009947, 7987229399128149852) - - Which you can write as: - >>> hashes = DeepHash(obj)[obj] - - At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. - - The result hash is (3415089864575009947, 7987229399128149852). - In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. - Using Murmur 3 64bit for hashing is preferred (and is the default behaviour) - since the chance of hash collision will be minimal and hashing will be deterministic - and will not depend on the version of the Python. - - If you do a deep copy of obj, it should still give you the same hash: - >>> from copy import deepcopy - 2481013017017307534 - >>> DeepHash(obj2)[obj2] - (3415089864575009947, 7987229399128149852) +class DeepHash(dict): + __doc__ = doc - Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: - >>> obj3 = {1: 2, b'a': b'b'} - >>> DeepHash(obj3)[obj3] - (3415089864575009947, 7987229399128149852) - - But if you want a different hash if string types are different, set include_string_type_changes to True: - >>> DeepHash(obj3, include_string_type_changes=True)[obj3] - (6406752576584602448, -8103933101621212760) - """ + MURMUR_SEED = 1203 def __init__(self, obj, @@ -207,9 +95,7 @@ def sha1hex(obj): def murmur3_128bit(obj): """Use murmur3_128bit for 128 bit hash (default).""" obj = obj.encode('utf-8') - # hash64 is actually 128bit. Weird. - # 1203 is the seed - return mmh3.hash64(obj, 1203) + return mmh3.hash128(obj, DeepHash.MURMUR_SEED) def _get_item(self, key, changed_to_id=False): try: diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst new file mode 100644 index 00000000..c8d4120c --- /dev/null +++ b/deepdiff/deephash_doc.rst @@ -0,0 +1,118 @@ +**DeepHash** + +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. + +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +The core of DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. +but you can pass another hash function to it if you want. + +**Parameters** + +obj : any object, The object to be hashed based on its content. + +hashes : dictionary, default = empty dictionary. + A dictionary of {object id: object hash} to start with. + Any object that is encountered and its id is already in the hashes dictionary, + will re-use the hash that is provided by this dictionary instead of re-calculating + its hash. + +exclude_types: list, default = None. + List of object types to exclude from hashing. + +hasher: function. default = DeepHash.murmur3_128bit + hasher is the hashing function. The default is DeepHash.murmur3_128bit. + But you can pass another hash function to it if you want. + For example a cryptographic hash function or Python's builtin hash function. + All it needs is a function that takes the input in string format and returns the hash. + + You can use it by passing: hasher=hash for Python's builtin hash. + + SHA1 is already provided as an alternative too: + You can use it by passing: hasher=DeepHash.sha1hex + +ignore_repetition: Boolean, default = True + If repetitions in an iterable should cause the hash of iterable to be different. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + +significant_digits : int >= 0, default=None. + If it is a non negative integer, it compares only that many digits AFTER + the decimal point. + + This only affects floats, decimal.Decimal and complex. + + Takse a look at DeepDiff.diff docs for explanation of how this works. + +constant_size: Boolean, default = True + What DeepHash does is to "prep" the contents of objects into strings. + If constant_size is set, then it actually goes ahead and hashes the string + using the hasher function. + + The only time you want the constant_size to be False is if you want to know what + the string representation of your object is BEFORE it gets hashed. + +include_string_type_changes: Boolean, default = False + string type conversions should not affect the hash output when this is set to False. + For example "Hello" and b"Hello" should produce the same hash. + +**Returns** + A dictionary of {item id: item hash}. + If your object is nested, it will build hashes of all the objects it contains! + + +**Examples** + +Let's say you have a dictionary object. + >>> from deepdiff import DeepHash + >>> + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) + {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} + +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + >>> hashes = DeepHash(obj) + >>> hashes[obj] + (3415089864575009947, 7987229399128149852) + +Which you can write as: + >>> hashes = DeepHash(obj)[obj] + +At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + +The result hash is (3415089864575009947, 7987229399128149852). +In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. +Using Murmur 3 64bit for hashing is preferred (and is the default behaviour) +since the chance of hash collision will be minimal and hashing will be deterministic +and will not depend on the version of the Python. + +If you do a deep copy of obj, it should still give you the same hash: + >>> from copy import deepcopy + 2481013017017307534 + >>> DeepHash(obj2)[obj2] + (3415089864575009947, 7987229399128149852) + +Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + (3415089864575009947, 7987229399128149852) + +But if you want a different hash if string types are different, set include_string_type_changes to True: + >>> DeepHash(obj3, include_string_type_changes=True)[obj3] + (6406752576584602448, -8103933101621212760) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index abb30562..09b7eeb5 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -22,11 +22,13 @@ from ordered_set import OrderedSet from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, - IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set) + IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, + current_dir, convert_item_or_items_into_set_else_none, + convert_item_or_items_into_compiled_regexes_else_none) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship -from deepdiff.contenthash import DeepHash +from deepdiff.deephash import DeepHash logger = logging.getLogger(__name__) warnings.simplefilter('once', DeprecationWarning) @@ -35,8 +37,6 @@ TEXT_VIEW = 'text' -current_dir = os.path.dirname(os.path.abspath(__file__)) - with open(os.path.join(current_dir, 'diff_doc.rst'), 'r') as doc_file: doc = doc_file.read() @@ -67,8 +67,8 @@ def __init__(self, self.ignore_order = ignore_order self.report_repetition = report_repetition - self.exclude_paths = set(exclude_paths) if exclude_paths else None - self.exclude_regex_paths = [i if isinstance(i, re.Pattern) else re.compile(i) for i in exclude_regex_paths] if exclude_regex_paths else None + self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) + self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.exclude_types = set(exclude_types) if exclude_types else None self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance self.include_string_type_changes = include_string_type_changes @@ -423,7 +423,7 @@ def __create_hashtable(self, t, level): self._add_hash(hashes=hashes, item_hash=item_hash, item=item, i=i) return hashes - def __diff_iterable_with_contenthash(self, level): + def __diff_iterable_with_deephash(self, level): """Diff of unhashable iterables. Only used when ignoring the order.""" t1_hashtable = self.__create_hashtable(level.t1, level) t2_hashtable = self.__create_hashtable(level.t2, level) @@ -554,7 +554,7 @@ def __diff(self, level, parents_ids=frozenset({})): elif isinstance(level.t1, Iterable): if self.ignore_order: - self.__diff_iterable_with_contenthash(level) + self.__diff_iterable_with_deephash(level) else: self.__diff_iterable(level, parents_ids) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 8a4dbcdb..0a83d7cc 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -2,6 +2,7 @@ import sys import datetime import re +import os import logging from decimal import Decimal from collections import namedtuple @@ -33,6 +34,8 @@ IndexedHash = namedtuple('IndexedHash', 'indexes item') +current_dir = os.path.dirname(os.path.abspath(__file__)) + def short_repr(item, max_length=15): """Short representation of item if it is too long""" diff --git a/docs/contenthash.rst b/docs/contenthash.rst index 179d8043..55c3041f 100644 --- a/docs/contenthash.rst +++ b/docs/contenthash.rst @@ -6,7 +6,7 @@ DeepHash Reference .. toctree:: :maxdepth: 3 -.. automodule:: deepdiff.contenthash +.. automodule:: deepdiff.deephash .. autoclass:: DeepHash :members: diff --git a/docs/index.rst b/docs/index.rst index 2adc25df..687a882b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -388,7 +388,7 @@ In order to get the hash of obj itself, you need to use the object (or the id of Read more in the Deep Hash reference: -:doc:`/contenthash` +:doc:`/deephash` .. _ignore\_order: #ignore-order .. _report\_repetition: #report-repetitions @@ -406,7 +406,7 @@ References diff dsearch - contenthash + deephash Indices and tables @@ -420,7 +420,7 @@ Indices and tables Changelog ========= -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/tests/test_hash.py b/tests/test_hash.py index fd64e15a..fbaa6a35 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import pytest from deepdiff import DeepHash -from deepdiff.contenthash import prepare_string_for_hashing, skipped, unprocessed +from deepdiff.deephash import prepare_string_for_hashing, skipped, unprocessed from deepdiff.helper import pypy3 from collections import namedtuple from functools import partial @@ -79,8 +79,8 @@ def test_prep_str(self): def test_prep_str_fail_if_mutable(self): """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from + This test fails if DeepHash is getting a mutable copy of hashes + which means each init of the DeepHash will have hashes from the previous init. """ obj1 = "a" @@ -378,9 +378,9 @@ def test_skip_path(self): dic2 = {} t2 = [dic2, 2] t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]']) - t2_hash = DeepHashPrep(t2, exclude_paths=['root[0]']) - # assert id(1) not in t1_hash - # assert id(2) in t1_hash + t2_hash = DeepHashPrep(t2, exclude_paths='root[0]') + assert id(1) not in t1_hash + assert id(2) in t1_hash assert t1_hash[2] == t2_hash[2] @@ -397,8 +397,8 @@ def test_prep_str_sha1(self): def test_prep_str_sha1_fail_if_mutable(self): """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from + This test fails if DeepHash is getting a mutable copy of hashes + which means each init of the DeepHash will have hashes from the previous init. """ obj1 = "a" From f65883bc26a798522e336aca80604fc771198f27 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 22:39:18 -0800 Subject: [PATCH 47/76] updating comments --- deepdiff/deephash_doc.rst | 12 +++++++++--- deepdiff/diff_doc.rst | 20 +++++++++++--------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index c8d4120c..d0183fc1 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -14,15 +14,21 @@ but you can pass another hash function to it if you want. obj : any object, The object to be hashed based on its content. -hashes : dictionary, default = empty dictionary. +hashes: dictionary, default = empty dictionary A dictionary of {object id: object hash} to start with. Any object that is encountered and its id is already in the hashes dictionary, will re-use the hash that is provided by this dictionary instead of re-calculating its hash. -exclude_types: list, default = None. +exclude_types: list, default = None List of object types to exclude from hashing. +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can path it as a string. + +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. + hasher: function. default = DeepHash.murmur3_128bit hasher is the hashing function. The default is DeepHash.murmur3_128bit. But you can pass another hash function to it if you want. @@ -39,7 +45,7 @@ ignore_repetition: Boolean, default = True Note that the deepdiff diffing functionality lets this to be the default at all times. But if you are using DeepHash directly, you can set this parameter. -significant_digits : int >= 0, default=None. +significant_digits : int >= 0, default=None If it is a non negative integer, it compares only that many digits AFTER the decimal point. diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index e9a8777d..4ca54b87 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -40,13 +40,13 @@ verbose_level : int >= 0, default = 1. For example verbose level 1 shows what dictionary item are added or removed. And verbose level 2 shows the value of the items that are added or removed too. -exclude_paths: list, default = None. - List of paths to exclude from the report. +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can path it as a string. -exclude_regex_paths: list, default = None. - List of string regex paths or compiled regex paths objects to exclude from the report. +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. -exclude_types: list, default = None. +exclude_types: list, default = None List of object types to exclude from the report. hasher: default = DeepHash.murmur3_128bit @@ -566,16 +566,18 @@ Exclude certain types from comparison: **Exclude paths** Exclude part of your object tree from comparison -use `exclude_paths` and pass a set or list of paths to exclude: +use `exclude_paths` and pass a set or list of paths to exclude, if only one item is being passed, then just put it there as a string. No need to pass it as a list then. >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) + >>> print (DeepDiff(t1, t2, exclude_paths="root['ingredients']")) # one item pass it as a string + {} + >>> print (DeepDiff(t1, t2, exclude_paths=["root['ingredients']", "root['ingredients2']"])) # multiple items pass as a list or a set. {} You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] - >>> print(DeepDiff(t1, t2, exclude_regex_paths={r"root\[\d+\]\['b'\]"})) + >>> print(DeepDiff(t1, t2, exclude_regex_paths=r"root\[\d+\]\['b'\]")) {} >>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") >>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) @@ -584,7 +586,7 @@ You can also exclude using regular expressions by using `exclude_regex_paths` an example 2: >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} - >>> DeepDiff(t1, t2, exclude_regex_paths={"\['foo.'\]"}) + >>> DeepDiff(t1, t2, exclude_regex_paths="\['foo.'\]") # since it is one item in exclude_regex_paths, you don't have to put it in a list or a set. {} Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. From 54d1b41539618f4ec32b814eca37b1a9ab058ab7 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 22:49:56 -0800 Subject: [PATCH 48/76] re compile type in older python --- deepdiff/helper.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 0a83d7cc..60a64ca0 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -167,11 +167,14 @@ def convert_item_or_items_into_set_else_none(items): return items +RE_COMPILED_TYPE = type(re.compile('')) + + def convert_item_or_items_into_compiled_regexes_else_none(items): if items: - if isinstance(items, (strings, re.Pattern)): + if isinstance(items, (strings, RE_COMPILED_TYPE)): items = [items] - items = [i if isinstance(i, re.Pattern) else re.compile(i) for i in items] + items = [i if isinstance(i, RE_COMPILED_TYPE) else re.compile(i) for i in items] else: items = None return items From 04780438402b4f233b45ccdc8d101d8ac38fb7f3 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 23:09:40 -0800 Subject: [PATCH 49/76] updating docs --- AUTHORS | 1 + README.md | 4 ++-- docs/index.rst | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/AUTHORS b/AUTHORS index 718d49d4..d9309fa3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -20,3 +20,4 @@ Also thanks to: - Brian Maissy (brianmaissy) for weakref fix, enum tests - Bartosz Borowik (boba-2) for Exclude types fix when ignoring order - Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number diff --git a/README.md b/README.md index 1f74d8be..c22e346e 100644 --- a/README.md +++ b/README.md @@ -995,9 +995,9 @@ And here is more info: -## Change log +## ChangeLog -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/docs/index.rst b/docs/index.rst index 687a882b..0eced906 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -420,7 +420,7 @@ Indices and tables Changelog ========= -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) From d8395e45f52d48263ff7d677d4102f445f7f9987 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 23:22:26 -0800 Subject: [PATCH 50/76] updating authors --- AUTHORS | 3 +-- README.md | 7 +++++-- docs/index.rst | 6 ++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/AUTHORS b/AUTHORS index d9309fa3..e598a258 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,8 +1,6 @@ Authors: - Seperman - Victor Hahn Castell @ Flexoptix - -Also thanks to: - nfvs for Travis-CI setup script. - brbsix for initial Py3 porting. - WangFenjin for unicode support. @@ -21,3 +19,4 @@ Also thanks to: - Bartosz Borowik (boba-2) for Exclude types fix when ignoring order - Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared - Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support diff --git a/README.md b/README.md index c22e346e..19ae740c 100644 --- a/README.md +++ b/README.md @@ -997,7 +997,7 @@ And here is more info: ## ChangeLog -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) @@ -1058,5 +1058,8 @@ Also thanks to: - maxrothman for search for types/objects - MartyHub for exclude regex paths - sreecodeslayer for DeepSearch match_string -- Brian Maissy (brianmaissy) for weakref fix and enum tests +- Brian Maissy (brianmaissy) for weakref fix, enum tests - Bartosz Borowik (boba-2) for Exclude types fix when ignoring order +- Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support diff --git a/docs/index.rst b/docs/index.rst index 0eced906..73864dcc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -420,7 +420,7 @@ Indices and tables Changelog ========= -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) @@ -485,7 +485,9 @@ ALso thanks to: - maxrothman for search for types/objects - MartyHub for exclude regex paths - sreecodeslayer for DeepSearch match_string -- Brian Maissy (brianmaissy) for weakref fix and enum tests +- Brian Maissy (brianmaissy) for weakref fix, enum tests - Bartosz Borowik (boba-2) for Exclude types fix when ignoring order - Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support From f3add4755e8e0b9af2f0194d6b2bbcecc79dbe12 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Feb 2019 23:31:19 -0800 Subject: [PATCH 51/76] updating docs --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 19ae740c..39549556 100644 --- a/README.md +++ b/README.md @@ -1029,6 +1029,13 @@ And here is more info: - v0-5-6: Adding slots support - v0-5-5: Adding loop detection +## Contribute + +1. Please make your PR against the dev branch +2. Please make sure that your PR has tests. Since DeepDiff is used in many sensitive data driven projects, we maintain 100% test coverage on the code. There are occasiannly exceptions to that rule but that is rare. + +Thank you! + ## Authors Seperman (Sep Dehpour) From b01f0e430af15b977e6d6db3d26466df0364d65d Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Feb 2019 00:08:30 -0800 Subject: [PATCH 52/76] playing with ignore type groups --- README.md | 6 +- deepdiff/diff.py | 45 ++++++++++---- deepdiff/diff_doc.rst | 84 ++++++++++++++------------ docs/{contenthash.rst => deephash.rst} | 0 docs/index.rst | 2 +- tests/test_diff_text.py | 15 +++-- 6 files changed, 92 insertions(+), 60 deletions(-) rename docs/{contenthash.rst => deephash.rst} (100%) diff --git a/README.md b/README.md index 39549556..97f908e4 100644 --- a/README.md +++ b/README.md @@ -209,7 +209,7 @@ Ignore Type Number - Dictionary that contains float and integer: 'new_value': 1.0, 'old_type': , 'old_value': 1}}} ->>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) +>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=True) >>> pprint(ddiff, indent=2) {} ``` @@ -235,7 +235,7 @@ Ignore Type Number - List that contains float and integer: 'new_value': 3.0, 'old_type': , 'old_value': 3}}} ->>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) +>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=True) >>> pprint(ddiff, indent=2) {} ``` @@ -997,7 +997,7 @@ And here is more info: ## ChangeLog -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. Adding match_string to DeepSearch. Adding Timedelta object diffing. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index c298a4d2..a8d59a20 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -7,8 +7,6 @@ # every time you run the docstrings. # However the docstring expects it in a specific order in order to pass! -import re -import os import difflib import logging import json @@ -23,7 +21,7 @@ from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, - current_dir, convert_item_or_items_into_set_else_none, + convert_item_or_items_into_set_else_none, convert_item_or_items_into_compiled_regexes_else_none) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship @@ -36,13 +34,16 @@ TREE_VIEW = 'tree' TEXT_VIEW = 'text' + class DeepDiff(ResultDict): + numbers = numbers + strings = strings def __init__(self, t1, t2, ignore_order=False, - ignore_type_number=False, + ignore_type_in_groups=None, report_repetition=False, significant_digits=None, exclude_paths=None, @@ -57,11 +58,18 @@ def __init__(self, if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are ignore_order, report_repetition, significant_digits, ignore_type_number" + "The valid parameters are ignore_order, report_repetition, significant_digits, ignore_type_in_groups" "exclude_paths, exclude_types, exclude_regex_paths, transformer, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order - self.ignore_type_number = ignore_type_number + if ignore_type_in_groups: + if isinstance(ignore_type_in_groups[0], type): + ignore_type_in_groups = [tuple(ignore_type_in_groups)] + else: + ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) + else: + ignore_type_in_groups = [] + self.ignore_type_in_groups = ignore_type_in_groups self.report_repetition = report_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) @@ -356,15 +364,21 @@ def __diff_str(self, level): # do we add a diff for convenience? do_diff = True + t1_str = level.t1 + t2_str = level.t2 + if isinstance(level.t1, bytes_type): try: t1_str = level.t1.decode('ascii') + except UnicodeDecodeError: + do_diff = False + + if isinstance(level.t2, bytes_type): + try: t2_str = level.t2.decode('ascii') except UnicodeDecodeError: do_diff = False - else: - t1_str = level.t1 - t2_str = level.t2 + if do_diff: if u'\n' in t1_str or u'\n' in t2_str: diff = difflib.unified_diff( @@ -530,10 +544,17 @@ def __diff(self, level, parents_ids=frozenset({})): if self.__skip_this(level): return - if type(level.t1) != type(level.t2) and not (self.ignore_type_number and isinstance(level.t1, numbers) and isinstance(level.t2, numbers)): - self.__diff_types(level) + if type(level.t1) != type(level.t2): # NOQA + report_type_change = True + for type_group in self.ignore_type_in_groups: + if isinstance(level.t1, type_group) and isinstance(level.t2, type_group): + report_type_change = False + break + if report_type_change: + self.__diff_types(level) + return - elif isinstance(level.t1, strings): + if isinstance(level.t1, strings): self.__diff_str(level) elif isinstance(level.t1, numbers): diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 23d6ded6..4c847742 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -35,7 +35,7 @@ significant_digits : int >= 0, default=None. For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) -ignore_type_number : Boolean, default=False ignores types when t1 and t2 are numbers. +ignore_type_in_groups : Tuple or List of Tuples, default=None ignores types when t1 and t2 are both within the same type group. verbose_level : int >= 0, default = 1. Higher verbose level shows you more details. @@ -193,6 +193,49 @@ And if you don't care about the value of items that have changed type, please se { 'type_changes': { 'root[2]': { 'new_type': , 'old_type': }}} +ignore_type_in_groups + +Ignore Type Number - Dictionary that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[1]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +Ignore Type Number - List that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 2, 3] + >>> t2 = [1.0, 2.0, 3.0] + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +You can pass a list of tuples if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + + List difference >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} @@ -296,45 +339,6 @@ Approximate float comparison (Significant digits after the point): {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} -Ignore Type Number - Dictionary that contains float and integer: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1: 1, 2: 2.22} - >>> t2 = {1: 1.0, 2: 2.22} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[1]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) - >>> pprint(ddiff, indent=2) - {} - -Ignore Type Number - List that contains float and integer: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 2, 3] - >>> t2 = [1.0, 2.0, 3.0] - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[0]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}, - 'root[1]': { 'new_type': , - 'new_value': 2.0, - 'old_type': , - 'old_value': 2}, - 'root[2]': { 'new_type': , - 'new_value': 3.0, - 'old_type': , - 'old_value': 3}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_number=True) - >>> pprint(ddiff, indent=2) - {} - - .. note:: All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. diff --git a/docs/contenthash.rst b/docs/deephash.rst similarity index 100% rename from docs/contenthash.rst rename to docs/deephash.rst diff --git a/docs/index.rst b/docs/index.rst index 73864dcc..9ce26ab1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -420,7 +420,7 @@ Indices and tables Changelog ========= -- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_number. Adding match_string to DeepSearch. Adding Timedelta object diffing. +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 8c843ecc..8825bf36 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1303,16 +1303,23 @@ def test_negative_significant_digits(self): with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) - def test_ignore_type_number(self): + def test_ignore_type_in_groups(self): t1 = [1, 2, 3] t2 = [1.0, 2.0, 3.0] - ddiff = DeepDiff(t1, t2, ignore_type_number=True) + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) assert not ddiff - def test_ignore_type_number2(self): + def test_ignore_type_in_groups2(self): t1 = [1, 2, 3] t2 = [1.0, 2.0, 3.3] - ddiff = DeepDiff(t1, t2, ignore_type_number=True) + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} + assert result == ddiff + + def test_ignore_type_in_groups3(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[DeepDiff.numbers, DeepDiff.strings]) result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} assert result == ddiff From a38f875cfaf0e92a13752c20955d96df0364188c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Feb 2019 00:19:47 -0800 Subject: [PATCH 53/76] dealing with unicode --- deepdiff/diff.py | 5 +- tests/test_diff_text.py | 148 +++++++++++++--------------------------- 2 files changed, 52 insertions(+), 101 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index a8d59a20..dab5487f 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -359,7 +359,7 @@ def __diff_iterable(self, level, parents_ids=frozenset({})): def __diff_str(self, level): """Compare strings""" - if level.t1 == level.t2: + if type(level.t1) == type(level.t2) and level.t1 == level.t2: return # do we add a diff for convenience? @@ -379,6 +379,9 @@ def __diff_str(self, level): except UnicodeDecodeError: do_diff = False + if t1_str == t2_str: + return + if do_diff: if u'\n' in t1_str or u'\n' in t2_str: diff = difflib.unified_diff( diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 8825bf36..b3e47a3b 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -5,7 +5,6 @@ import logging from decimal import Decimal from deepdiff import DeepDiff -from deepdiff.helper import py3 from tests import CustomClass from unittest import mock logging.disable(logging.CRITICAL) @@ -1106,136 +1105,73 @@ def test_unicode_string_type_changes(self): unicode_string = {"hello": u"你好"} ascii_string = {"hello": "你好"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - # In python3, all string is unicode, so diff is empty - result = {} - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - "root['hello']": { - 'old_type': unicode, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbd', - 'old_value': u'\u4f60\u597d', - 'new_type': str - } - } - } + result = {} assert result == ddiff def test_unicode_string_value_changes(self): unicode_string = {"hello": u"你好"} ascii_string = {"hello": u"你好hello"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - result = { - 'values_changed': { - "root['hello']": { - 'old_value': '你好', - 'new_value': '你好hello' - } - } - } - else: - result = { - 'values_changed': { - "root['hello']": { - 'new_value': u'\u4f60\u597dhello', - 'old_value': u'\u4f60\u597d' - } + result = { + 'values_changed': { + "root['hello']": { + 'old_value': '你好', + 'new_value': '你好hello' } } + } assert result == ddiff def test_unicode_string_value_and_type_changes(self): unicode_string = {"hello": u"你好"} ascii_string = {"hello": "你好hello"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'values_changed': { - "root['hello']": { - 'new_value': '你好hello', - 'old_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - "root['hello']": { - 'old_type': unicode, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbdhello', - 'old_value': u'\u4f60\u597d', - 'new_type': str - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'values_changed': { + "root['hello']": { + 'new_value': '你好hello', + 'old_value': '你好' } } + } assert result == ddiff def test_int_to_unicode_string(self): t1 = 1 ascii_string = "你好" ddiff = DeepDiff(t1, ascii_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_type': str, - 'old_value': 1, - 'new_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbd', - 'old_value': 1, - 'new_type': str - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'type_changes': { + 'root': { + 'old_type': int, + 'new_type': str, + 'old_value': 1, + 'new_value': '你好' } } + } assert result == ddiff def test_int_to_unicode(self): t1 = 1 unicode_string = u"你好" ddiff = DeepDiff(t1, unicode_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_type': str, - 'old_value': 1, - 'new_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_value': u'\u4f60\u597d', - 'old_value': 1, - 'new_type': unicode - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'type_changes': { + 'root': { + 'old_type': int, + 'new_type': str, + 'old_value': 1, + 'new_value': '你好' } } + } assert result == ddiff def test_significant_digits_for_decimals(self): @@ -1316,7 +1252,19 @@ def test_ignore_type_in_groups2(self): result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} assert result == ddiff - def test_ignore_type_in_groups3(self): + def test_ignore_type_in_groups_just_numbers(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[DeepDiff.numbers]) + result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}, + 'type_changes': {'root[3]': {'new_type': bytes, + 'new_value': b'a', + 'old_type': str, + 'old_value': 'a'}} + } + assert result == ddiff + + def test_ignore_type_in_groups_numbers_and_strings(self): t1 = [1, 2, 3, 'a'] t2 = [1.0, 2.0, 3.3, b'a'] ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[DeepDiff.numbers, DeepDiff.strings]) From 72e48fac5220756fca0d34e23fe18dfb7691328b Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Feb 2019 00:20:54 -0800 Subject: [PATCH 54/76] cleaning up --- tests/test_search.py | 13 ------------- tests/test_serialization.py | 19 ------------------- 2 files changed, 32 deletions(-) diff --git a/tests/test_search.py b/tests/test_search.py index 8762cae3..5e6f237b 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,18 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.search_tests - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - nosetests tests/test_search.py:DeepSearchTestCase.test_case_insensitive_of_str_in_list -""" import pytest from deepdiff import DeepSearch, grep from datetime import datetime diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 08b61cba..91fc4084 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,24 +1,5 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_serialization - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_serialization.DeepDiffTextTestCase.test_same_objects - -or using nosetests: - nosetests tests/test_serialization.py:DeepDiffTestCase.test_diff_when_hash_fails -""" from deepdiff import DeepDiff import logging From a53d874f49176839737682e5289484477262243d Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Feb 2019 00:28:39 -0800 Subject: [PATCH 55/76] changing test runner --- run_tests.py | 8 -------- run_tests.sh | 1 + 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100755 run_tests.py create mode 100755 run_tests.sh diff --git a/run_tests.py b/run_tests.py deleted file mode 100755 index 3f4217dd..00000000 --- a/run_tests.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import unittest -loader = unittest.TestLoader() -tests = loader.discover('.') -testRunner = unittest.runner.TextTestRunner() -testRunner.run(tests) diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 00000000..660146f5 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1 @@ +pytest --cov=deepdiff --cov-report term-missing From 20e24fdd4d4e0965f99e1e7ea5f7f466f732cba7 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 22 Feb 2019 19:22:55 -0800 Subject: [PATCH 56/76] covering deephash --- deepdiff/deephash.py | 2 +- tests/test_hash.py | 64 +++++++++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index cf2134f9..8450416d 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -247,7 +247,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = not_hashed if self._skip_this(obj, parent): - result = skipped + return elif obj is None: result = 'NONE' diff --git a/tests/test_hash.py b/tests/test_hash.py index fbaa6a35..6dfb78aa 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,12 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import re import pytest +import logging from deepdiff import DeepHash -from deepdiff.deephash import prepare_string_for_hashing, skipped, unprocessed +from deepdiff.deephash import prepare_string_for_hashing, unprocessed from deepdiff.helper import pypy3 from collections import namedtuple from functools import partial -import logging from enum import Enum logging.disable(logging.CRITICAL) @@ -40,6 +41,19 @@ def test_dictionary(self): result = DeepHash(obj) assert set(result.keys()) == {id(1), id(obj)} + def test_get_hash_by_obj_is_the_same_as_by_obj_id(self): + a = "a" + obj = {1: a} + result = DeepHash(obj) + assert result[id(a)] == result[a] + + def test_get_hash_by_obj_when_does_not_exist(self): + a = "a" + obj = {1: a} + result = DeepHash(obj) + with pytest.raises(KeyError): + result[2] + def test_list_of_sets(self): a = {1} b = {2} @@ -64,6 +78,20 @@ def __str__(self): expected_result = {id(t1): unprocessed, 'unprocessed': [t1]} assert expected_result == result + def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): + a = 'hello' + b = b'hello' + a_hash = DeepHash(a)[a] + b_hash = DeepHash(b)[b] + assert a_hash == b_hash + + def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): + a = 'hello' + b = b'hello' + a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[a] + b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[b] + assert a_hash == b_hash + class TestDeepHashPrep: """DeepHashPrep Tests covering object serialization.""" @@ -342,6 +370,11 @@ def test_skip_type(self): result = DeepHashPrep(obj, exclude_types={logging.Logger}) assert id(l1) not in result + def test_skip_type2(self): + l1 = logging.getLogger("test") + result = DeepHashPrep(l1, exclude_types={logging.Logger}) + assert not result + def test_prep_dic_with_loop(self): obj = {2: 1337} obj[1] = obj @@ -383,6 +416,16 @@ def test_skip_path(self): assert id(2) in t1_hash assert t1_hash[2] == t2_hash[2] + def test_skip_regex_path(self): + dic1 = {1: "a"} + t1 = [dic1, 2] + exclude_re = re.compile(r'\[0\]') + t1_hash = DeepHashPrep(t1, exclude_regex_paths=r'\[0\]') + t2_hash = DeepHashPrep(t1, exclude_regex_paths=[exclude_re]) + assert id(1) not in t1_hash + assert id(2) in t1_hash + assert t1_hash[2] == t2_hash[2] + class TestDeepHashSHA1: """DeepHash with SHA1 Tests.""" @@ -449,23 +492,6 @@ def test_dict1(self): assert expected_result == result -class TestHasher: - - def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): - a = 'hello' - b = b'hello' - a_hash = DeepHash(a)[a] - b_hash = DeepHash(b)[b] - assert a_hash == b_hash - - def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): - a = 'hello' - b = b'hello' - a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[a] - b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[b] - assert a_hash == b_hash - - class TestCleaningString: @pytest.mark.parametrize("text, include_string_type_changes, expected_result", [ From f5e0e00a5a46faf7d4d7eb920c9ebc5b7122f1a6 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Fri, 22 Feb 2019 23:38:23 -0800 Subject: [PATCH 57/76] wip --- README.md | 24 ++++++++++++++--------- deepdiff/deephash.py | 12 ++++++------ deepdiff/deephash_doc.rst | 8 ++++---- deepdiff/diff.py | 41 ++++++++++++++++++++++++++++----------- deepdiff/diff_doc.rst | 15 ++++++++++++++ tests/test_diff_text.py | 24 +++++++++++++++++++++++ tests/test_hash.py | 22 ++++++++++----------- 7 files changed, 105 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 97f908e4..4a12edc3 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ Approximate float comparison: {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} ``` -## Ignore Type Number +## Ignore Type In Groups Ignore Type Number - Dictionary that contains float and integer: @@ -209,7 +209,7 @@ Ignore Type Number - Dictionary that contains float and integer: 'new_value': 1.0, 'old_type': , 'old_value': 1}}} ->>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=True) +>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) >>> pprint(ddiff, indent=2) {} ``` @@ -382,17 +382,23 @@ If you do a deep copy of obj, it should still give you the same hash: >>> from copy import deepcopy 2481013017017307534 >>> DeepHash(obj2)[obj2] -(3415089864575009947, 7987229399128149852) +34150898645750099477987229399128149852 ``` Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: - >>> obj3 = {1: 2, b'a': b'b'} - >>> DeepHash(obj3)[obj3] - (3415089864575009947, 7987229399128149852) -But if you want a different hash if string types are different, set include_string_type_changes to True: - >>> DeepHash(obj3, include_string_type_changes=True)[obj3] - (6406752576584602448, -8103933101621212760) +```py +>>> obj3 = {1: 2, b'a': b'b'} +>>> DeepHash(obj3)[obj3] +34150898645750099477987229399128149852 +``` + +But if you want a different hash if string types are different, set ignore_string_type_changes to False: + +```py +>>> DeepHash(obj3, ignore_string_type_changes=False)[obj3] +64067525765846024488103933101621212760 +``` # Using DeepDiff in unit tests diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 8450416d..aca00e11 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -22,14 +22,14 @@ INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') -def prepare_string_for_hashing(obj, include_string_type_changes=False): +def prepare_string_for_hashing(obj, ignore_string_type_changes=False): """ Clean type conversions """ original_type = obj.__class__.__name__ if isinstance(obj, bytes): obj = obj.decode('utf-8') - if include_string_type_changes: + if not ignore_string_type_changes: obj = "{}:{}".format(original_type, obj) return obj @@ -53,7 +53,7 @@ def __init__(self, ignore_repetition=True, significant_digits=None, constant_size=True, - include_string_type_changes=False, + ignore_string_type_changes=True, **kwargs): if kwargs: raise ValueError( @@ -72,7 +72,7 @@ def __init__(self, self.update(hashes) self[UNPROCESSED] = [] self.significant_digits = significant_digits - self.include_string_type_changes = include_string_type_changes + self.ignore_string_type_changes = ignore_string_type_changes # makes the hash return constant size result if true # the only time it should be set to False is when # testing the individual hash functions for different types of objects. @@ -253,7 +253,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = 'NONE' elif isinstance(obj, strings): - result = prepare_string_for_hashing(obj, include_string_type_changes=self.include_string_type_changes) + result = prepare_string_for_hashing(obj, ignore_string_type_changes=self.ignore_string_type_changes) elif isinstance(obj, numbers): result = self._prep_number(obj) @@ -283,7 +283,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): if isinstance(obj, strings): result_cleaned = result else: - result_cleaned = prepare_string_for_hashing(result, include_string_type_changes=self.include_string_type_changes) + result_cleaned = prepare_string_for_hashing(result, ignore_string_type_changes=self.ignore_string_type_changes) result = self.hasher(result_cleaned) # It is important to keep the hash of all objects. diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index d0183fc1..84263d65 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -61,8 +61,8 @@ constant_size: Boolean, default = True The only time you want the constant_size to be False is if you want to know what the string representation of your object is BEFORE it gets hashed. -include_string_type_changes: Boolean, default = False - string type conversions should not affect the hash output when this is set to False. +ignore_string_type_changes: Boolean, default = True + string type conversions should not affect the hash output when this is set to True. For example "Hello" and b"Hello" should produce the same hash. **Returns** @@ -119,6 +119,6 @@ Note that by default DeepHash will ignore string type differences. So if your st >>> DeepHash(obj3)[obj3] (3415089864575009947, 7987229399128149852) -But if you want a different hash if string types are different, set include_string_type_changes to True: - >>> DeepHash(obj3, include_string_type_changes=True)[obj3] +But if you want a different hash if string types are different, set ignore_string_type_changes to True: + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] (6406752576584602448, -8103933101621212760) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index dab5487f..a317e34d 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -49,7 +49,8 @@ def __init__(self, exclude_paths=None, exclude_regex_paths=None, exclude_types=None, - include_string_type_changes=False, + ignore_string_type_changes=None, + include_numeric_type_changes=True, verbose_level=1, view=TEXT_VIEW, hasher=DeepHash.murmur3_128bit, @@ -62,20 +63,20 @@ def __init__(self, "exclude_paths, exclude_types, exclude_regex_paths, transformer, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order - if ignore_type_in_groups: - if isinstance(ignore_type_in_groups[0], type): - ignore_type_in_groups = [tuple(ignore_type_in_groups)] - else: - ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) - else: - ignore_type_in_groups = [] - self.ignore_type_in_groups = ignore_type_in_groups + if ignore_string_type_changes is not None and ignore_type_in_groups is not None: + raise ValueError('Please set either ignore_string_type_changes or ignore_type_in_groups but not both.') + if ignore_type_in_groups is None and ignore_string_type_changes is None: + ignore_string_type_changes = True + self.ignore_type_in_groups = self._get_ignore_types_in_groups( + ignore_type_in_groups, + ignore_string_type_changes, include_numeric_type_changes) self.report_repetition = report_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.exclude_types = set(exclude_types) if exclude_types else None self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance - self.include_string_type_changes = include_string_type_changes + self.ignore_string_type_changes = ignore_string_type_changes + self.include_numeric_type_changes = include_numeric_type_changes self.hashes = {} self.hasher = hasher @@ -101,6 +102,24 @@ def __init__(self, view_results = self._get_view_results(view) self.update(view_results) + def _get_ignore_types_in_groups(self, ignore_type_in_groups, + ignore_string_type_changes, include_numeric_type_changes): + if ignore_type_in_groups: + if isinstance(ignore_type_in_groups[0], type): + ignore_type_in_groups = [tuple(ignore_type_in_groups)] + else: + ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) + else: + ignore_type_in_groups = [] + + if ignore_string_type_changes: + ignore_type_in_groups.append(self.strings) + + if not include_numeric_type_changes: + ignore_type_in_groups.append(self.numbers) + + return ignore_type_in_groups + def _get_view_results(self, view): """ Get the results based on the view @@ -420,7 +439,7 @@ def __create_hashtable(self, t, level): hashes=self.hashes, exclude_types=self.exclude_types, significant_digits=self.significant_digits, - include_string_type_changes=self.include_string_type_changes, + ignore_string_type_changes=self.ignore_string_type_changes, hasher=self.hasher) item_hash = hashes_all.get(id(item), item) except Exception as e: # pragma: no cover diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 4c847742..53314da5 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -195,6 +195,19 @@ And if you don't care about the value of items that have changed type, please se ignore_type_in_groups +Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. +The shortcuts are ignore_string_type_changes which by default is True and include_numeric_type_changes which is by default true. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more power compared to the shortcuts. For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + +1. Set ignore_string_type_changes=True which is the default. +2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + +Now what if you want also typeA and typeB to be ignored when comparing agains each other? + +1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] +2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + +Note that you can either set the ignore_type_in_groups or the shortcuts but not both. + Ignore Type Number - Dictionary that contains float and integer: >>> from deepdiff import DeepDiff >>> from pprint import pprint @@ -235,6 +248,8 @@ Ignore Type Number - List that contains float and integer: You can pass a list of tuples if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: +ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + List difference >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index b3e47a3b..1e376fb6 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -43,6 +43,30 @@ def test_item_type_change_less_verbose(self): } }} == DeepDiff(t1, t2, verbose_level=0) + def test_item_type_change_for_strings_ignored_by_default(self): + """ ignore_string_type_changes = True by default """ + + t1 = 'hello' + t2 = b'hello' + ddiff = DeepDiff(t1, t2) + assert not ddiff + + def test_item_type_change_for_strings_override(self): + + t1 = 'hello' + t2 = b'hello' + ddiff = DeepDiff(t1, t2, ignore_string_type_changes=False) + assert { + 'type_changes': { + 'root': { + 'old_type': str, + 'new_type': bytes, + 'old_value': 'hello', + 'new_value': b'hello' + } + } + } == ddiff + def test_value_change(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: 4, 3: 3} diff --git a/tests/test_hash.py b/tests/test_hash.py index 6dfb78aa..77c732fe 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -29,8 +29,8 @@ def __repr__(self): DeepHashPrep = partial(DeepHash, constant_size=False) -def prep_str(obj, include_string_type_changes=False): - return 'str:{}'.format(obj) if include_string_type_changes else obj +def prep_str(obj, ignore_string_type_changes=True): + return obj if ignore_string_type_changes else 'str:{}'.format(obj) class TestDeepHash: @@ -101,8 +101,8 @@ def test_prep_str(self): expected_result = {id(obj): prep_str(obj)} result = DeepHashPrep(obj) assert expected_result == result - expected_result = {id(obj): prep_str(obj, include_string_type_changes=True)} - result = DeepHashPrep(obj, include_string_type_changes=True) + expected_result = {id(obj): prep_str(obj, ignore_string_type_changes=False)} + result = DeepHashPrep(obj, ignore_string_type_changes=False) assert expected_result == result def test_prep_str_fail_if_mutable(self): @@ -494,12 +494,12 @@ def test_dict1(self): class TestCleaningString: - @pytest.mark.parametrize("text, include_string_type_changes, expected_result", [ - (b'hello', False, 'hello'), - (b'hello', True, 'bytes:hello'), - ('hello', False, 'hello'), - ('hello', True, 'str:hello'), + @pytest.mark.parametrize("text, ignore_string_type_changes, expected_result", [ + (b'hello', True, 'hello'), + (b'hello', False, 'bytes:hello'), + ('hello', True, 'hello'), + ('hello', False, 'str:hello'), ]) - def test_clean_type(self, text, include_string_type_changes, expected_result): - result = prepare_string_for_hashing(text, include_string_type_changes=include_string_type_changes) + def test_clean_type(self, text, ignore_string_type_changes, expected_result): + result = prepare_string_for_hashing(text, ignore_string_type_changes=ignore_string_type_changes) assert expected_result == result From ff9261de03a5765ca196cedcf477834e04ef4f23 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Wed, 13 Mar 2019 18:42:19 -0700 Subject: [PATCH 58/76] fixing tests --- README.md | 8 +- deepdiff/deephash.py | 98 +++++++++++-------- deepdiff/deephash_doc.rst | 14 +-- deepdiff/diff.py | 19 ++-- deepdiff/diff_doc.rst | 2 +- deepdiff/helper.py | 9 ++ tests/test_diff_text.py | 19 +++- tests/test_hash.py | 194 ++++++++++++++++++++++---------------- 8 files changed, 215 insertions(+), 148 deletions(-) diff --git a/README.md b/README.md index 4a12edc3..04c08a4d 100644 --- a/README.md +++ b/README.md @@ -348,7 +348,7 @@ But with DeepHash: >>> from deepdiff import DeepHash >>> obj = {1: 2, 'a': 'b'} >>> DeepHash(obj) -{4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} +{4355639248: 2468916477072481777512283587789292749, 4355639280: -35787773492556653776377555218122431491, 4358636128: -88390647972316138151822486391929534118, 4358009664: 8833996863197925870419376694314494743, 4357467952: 34150898645750099477987229399128149852} ``` So what is exactly the hash of obj in this case? @@ -359,7 +359,7 @@ In order to get the hash of obj itself, you need to use the object (or the id of ```py >>> hashes = DeepHash(obj) >>> hashes[obj] -(3415089864575009947, 7987229399128149852) +34150898645750099477987229399128149852 ``` Which you can write as: @@ -370,7 +370,7 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -The result hash is `(3415089864575009947, 7987229399128149852)`. +The result hash is `34150898645750099477987229399128149852`. In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. Using Murmur3 128bit for hashing is preferred (and is the default behaviour) since the chance of hash collision will be minimal and hashing will be deterministic @@ -380,7 +380,7 @@ If you do a deep copy of obj, it should still give you the same hash: ```py >>> from copy import deepcopy -2481013017017307534 +>>> obj2 = deepcopy(obj) >>> DeepHash(obj2)[obj2] 34150898645750099477987229399128149852 ``` diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index aca00e11..1fbe0d33 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -9,9 +9,10 @@ from decimal import Decimal from hashlib import sha1 -from deepdiff.helper import (strings, numbers, unprocessed, skipped, not_hashed, add_to_frozen_set, +from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, current_dir, - convert_item_or_items_into_compiled_regexes_else_none) + convert_item_or_items_into_compiled_regexes_else_none, + get_id) logger = logging.getLogger(__name__) @@ -21,6 +22,11 @@ INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') +KEY_TO_VAL_STR = "{}:{}" + +DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 +ZERO_DECIMAL_CHARACTERS = set("-0.") + def prepare_string_for_hashing(obj, ignore_string_type_changes=False): """ @@ -30,7 +36,7 @@ def prepare_string_for_hashing(obj, ignore_string_type_changes=False): if isinstance(obj, bytes): obj = obj.decode('utf-8') if not ignore_string_type_changes: - obj = "{}:{}".format(original_type, obj) + obj = KEY_TO_VAL_STR.format(original_type, obj) return obj @@ -54,12 +60,15 @@ def __init__(self, significant_digits=None, constant_size=True, ignore_string_type_changes=True, + ignore_numeric_type_changes=False, **kwargs): if kwargs: raise ValueError( ("The following parameter(s) are not valid: %s\n" "The valid parameters are obj, hashes, exclude_types," - "exclude_paths, exclude_regex_paths, hasher and ignore_repetition.") % ', '.join(kwargs.keys())) + "exclude_paths, exclude_regex_paths, hasher, ignore_repetition," + "significant_digits, constant_size, ignore_string_type_changes," + "ignore_numeric_type_changes") % ', '.join(kwargs.keys())) self.obj = obj exclude_types = set() if exclude_types is None else set(exclude_types) self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance @@ -71,14 +80,18 @@ def __init__(self, hashes = hashes if hashes else {} self.update(hashes) self[UNPROCESSED] = [] - self.significant_digits = significant_digits + if ignore_numeric_type_changes and not significant_digits: + self.significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES + else: + self.significant_digits = significant_digits self.ignore_string_type_changes = ignore_string_type_changes + self.ignore_numeric_type_changes = ignore_numeric_type_changes # makes the hash return constant size result if true # the only time it should be set to False is when # testing the individual hash functions for different types of objects. self.constant_size = constant_size - self._hash(obj, parent="root", parents_ids=frozenset({id(obj)})) + self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)})) if self[UNPROCESSED]: logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED])) @@ -97,30 +110,29 @@ def murmur3_128bit(obj): obj = obj.encode('utf-8') return mmh3.hash128(obj, DeepHash.MURMUR_SEED) - def _get_item(self, key, changed_to_id=False): - try: - value = super().__getitem__(key) - except KeyError: - if changed_to_id: - raise KeyError('{} is not one of the hashed items.'.format(key)) from None - else: - key = id(key) - value = self._get_item(key, changed_to_id=True) - else: - return value + def __getitem__(self, obj): + # changed_to_id = False + key = obj + result = None - def __getitem__(self, key): - changed_to_id = False - if not isinstance(key, int): + try: + result = super().__getitem__(key) + except (TypeError, KeyError): + key = get_id(obj) try: - if key in RESERVED_DICT_KEYS: - return super().__getitem__(key) - except Exception: - pass - key = id(key) - changed_to_id = True + result = super().__getitem__(key) + except KeyError: + raise KeyError('{} is not one of the hashed items.'.format(obj)) from None + return result - return self._get_item(key, changed_to_id=changed_to_id) + def __contains__(self, obj): + try: + hash(obj) + except TypeError: + key = get_id(obj) + else: + key = obj + return super().__contains__(key) def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=False): """Difference of 2 objects""" @@ -163,12 +175,12 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut key_in_report = key_text % (parent, key_formatted) key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids) - item_id = id(item) + item_id = get_id(item) if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report): continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) hashed = self._hash(item, parent=key_in_report, parents_ids=parents_ids_added) - hashed = "{}:{}".format(key_hash, hashed) + hashed = KEY_TO_VAL_STR.format(key_hash, hashed) result.append(hashed) result.sort() @@ -188,7 +200,7 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): if self._skip_this(item, parent="{}[{}]".format(parent, i)): continue - item_id = id(item) + item_id = get_id(item) if parents_ids and item_id in parents_ids: continue @@ -206,22 +218,21 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = sorted(map(str, result)) # making sure the result items are string and sorted so join command works. result = ','.join(result) - result = "{}:{}".format(type(obj).__name__, result) + result = KEY_TO_VAL_STR.format(type(obj).__name__, result) return result def _prep_number(self, obj): - # Based on diff.DeepDiff.__diff_numbers - if self.significant_digits is not None and isinstance(obj, ( - float, complex, Decimal)): + if self.significant_digits is not None and ( + self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): obj_s = ("{:.%sf}" % self.significant_digits).format(obj) # Special case for 0: "-0.00" should compare equal to "0.00" - if set(obj_s) <= set("-0."): + if set(obj_s) <= ZERO_DECIMAL_CHARACTERS: obj_s = "0.00" result = "number:{}".format(obj_s) else: - result = "{}:{}".format(type(obj).__name__, obj) + result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) return result def _prep_tuple(self, obj, parent, parents_ids): @@ -240,9 +251,12 @@ def _prep_tuple(self, obj, parent, parents_ids): def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): """The main diff method""" - obj_id = id(obj) - if obj_id in self: - return self[obj_id] + try: + result = self[obj] + except (TypeError, KeyError): + pass + else: + return result result = not_hashed @@ -288,7 +302,11 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): # It is important to keep the hash of all objects. # The hashes will be later used for comparing the objects. - self[obj_id] = result + try: + self[obj] = result + except TypeError: + obj_id = get_id(obj) + self[obj_id] = result return result diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 84263d65..430bc85c 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -87,7 +87,7 @@ But with DeepHash: >>> from deepdiff import DeepHash >>> obj = {1: 2, 'a': 'b'} >>> DeepHash(obj) - {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} + {4355639248: 2468916477072481777512283587789292749, 4355639280: -35787773492556653776377555218122431491, 4358636128: -88390647972316138151822486391929534118, 4358009664: 8833996863197925870419376694314494743, 4357467952: 34150898645750099477987229399128149852} So what is exactly the hash of obj in this case? DeepHash is calculating the hash of the obj and any other object that obj contains. @@ -95,14 +95,14 @@ The output of DeepHash is a dictionary of object IDs to their hashes. In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: >>> hashes = DeepHash(obj) >>> hashes[obj] - (3415089864575009947, 7987229399128149852) + 34150898645750099477987229399128149852 Which you can write as: >>> hashes = DeepHash(obj)[obj] At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -The result hash is (3415089864575009947, 7987229399128149852). +The result hash is 34150898645750099477987229399128149852. In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. Using Murmur 3 64bit for hashing is preferred (and is the default behaviour) since the chance of hash collision will be minimal and hashing will be deterministic @@ -110,15 +110,15 @@ and will not depend on the version of the Python. If you do a deep copy of obj, it should still give you the same hash: >>> from copy import deepcopy - 2481013017017307534 + >>> obj2 = deepcopy(obj) >>> DeepHash(obj2)[obj2] - (3415089864575009947, 7987229399128149852) + 34150898645750099477987229399128149852 Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: >>> obj3 = {1: 2, b'a': b'b'} >>> DeepHash(obj3)[obj3] - (3415089864575009947, 7987229399128149852) + 34150898645750099477987229399128149852 But if you want a different hash if string types are different, set ignore_string_type_changes to True: >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] - (6406752576584602448, -8103933101621212760) + 64067525765846024488103933101621212760 diff --git a/deepdiff/diff.py b/deepdiff/diff.py index a317e34d..552a9d69 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -50,17 +50,16 @@ def __init__(self, exclude_regex_paths=None, exclude_types=None, ignore_string_type_changes=None, - include_numeric_type_changes=True, + ignore_numeric_type_changes=False, verbose_level=1, view=TEXT_VIEW, hasher=DeepHash.murmur3_128bit, - transformer=None, **kwargs): if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" "The valid parameters are ignore_order, report_repetition, significant_digits, ignore_type_in_groups" - "exclude_paths, exclude_types, exclude_regex_paths, transformer, verbose_level and view.") % ', '.join(kwargs.keys())) + "exclude_paths, exclude_types, exclude_regex_paths, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order if ignore_string_type_changes is not None and ignore_type_in_groups is not None: @@ -69,14 +68,14 @@ def __init__(self, ignore_string_type_changes = True self.ignore_type_in_groups = self._get_ignore_types_in_groups( ignore_type_in_groups, - ignore_string_type_changes, include_numeric_type_changes) + ignore_string_type_changes, ignore_numeric_type_changes) self.report_repetition = report_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) self.exclude_types = set(exclude_types) if exclude_types else None self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance self.ignore_string_type_changes = ignore_string_type_changes - self.include_numeric_type_changes = include_numeric_type_changes + self.ignore_numeric_type_changes = ignore_numeric_type_changes self.hashes = {} self.hasher = hasher @@ -89,10 +88,6 @@ def __init__(self, Verbose.level = verbose_level - if transformer: - t1 = transformer(t1) - t2 = transformer(t2) - root = DiffLevel(t1, t2) self.__diff(root, parents_ids=frozenset({id(t1)})) @@ -103,7 +98,7 @@ def __init__(self, self.update(view_results) def _get_ignore_types_in_groups(self, ignore_type_in_groups, - ignore_string_type_changes, include_numeric_type_changes): + ignore_string_type_changes, ignore_numeric_type_changes): if ignore_type_in_groups: if isinstance(ignore_type_in_groups[0], type): ignore_type_in_groups = [tuple(ignore_type_in_groups)] @@ -112,10 +107,10 @@ def _get_ignore_types_in_groups(self, ignore_type_in_groups, else: ignore_type_in_groups = [] - if ignore_string_type_changes: + if ignore_string_type_changes and self.strings not in ignore_type_in_groups: ignore_type_in_groups.append(self.strings) - if not include_numeric_type_changes: + if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: ignore_type_in_groups.append(self.numbers) return ignore_type_in_groups diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 53314da5..7798ab47 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -196,7 +196,7 @@ And if you don't care about the value of items that have changed type, please se ignore_type_in_groups Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. -The shortcuts are ignore_string_type_changes which by default is True and include_numeric_type_changes which is by default true. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more power compared to the shortcuts. For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: +The shortcuts are ignore_string_type_changes which by default is True and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more power compared to the shortcuts. For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: 1. Set ignore_string_type_changes=True which is the default. 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 60a64ca0..6dca007e 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -36,6 +36,8 @@ current_dir = os.path.dirname(os.path.abspath(__file__)) +ID_PREFIX = '!>*id' + def short_repr(item, max_length=15): """Short representation of item if it is too long""" @@ -178,3 +180,10 @@ def convert_item_or_items_into_compiled_regexes_else_none(items): else: items = None return items + + +def get_id(obj): + """ + Adding some characters to id so they are not just integers to reduce the risk of collision. + """ + return "{}{}".format(ID_PREFIX, id(obj)) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 1e376fb6..3e0e0254 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -52,7 +52,6 @@ def test_item_type_change_for_strings_ignored_by_default(self): assert not ddiff def test_item_type_change_for_strings_override(self): - t1 = 'hello' t2 = b'hello' ddiff = DeepDiff(t1, t2, ignore_string_type_changes=False) @@ -67,6 +66,24 @@ def test_item_type_change_for_strings_override(self): } } == ddiff + def test_type_change_numeric(self): + t1 = 10 + t2 = 10.0 + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True) + assert {} == ddiff + + @pytest.mark.parametrize("t1, t2, expected_result", + [ + (10, 10.0, {}), + (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), + (Decimal(10), 10.0, {}), + ({"a": Decimal(10), "b": 12}, {b"b": 12, "a": 10.0}, {}), + ] + ) + def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): + ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) + assert expected_result == ddiff + def test_value_change(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: 4, 3: 3} diff --git a/tests/test_hash.py b/tests/test_hash.py index 77c732fe..7deef2cd 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -5,7 +5,7 @@ import logging from deepdiff import DeepHash from deepdiff.deephash import prepare_string_for_hashing, unprocessed -from deepdiff.helper import pypy3 +from deepdiff.helper import pypy3, get_id from collections import namedtuple from functools import partial from enum import Enum @@ -39,13 +39,13 @@ def test_dictionary(self): obj = {1: 1} result = DeepHash(obj) - assert set(result.keys()) == {id(1), id(obj)} + assert set(result.keys()) == {1, get_id(obj)} - def test_get_hash_by_obj_is_the_same_as_by_obj_id(self): + def test_get_hash_by_obj_is_the_same_as_by_obj_get_id(self): a = "a" obj = {1: a} result = DeepHash(obj) - assert result[id(a)] == result[a] + assert result[a] def test_get_hash_by_obj_when_does_not_exist(self): a = "a" @@ -59,7 +59,7 @@ def test_list_of_sets(self): b = {2} obj = [a, b] result = DeepHash(obj) - expected_result = {id(1), id(2), id(a), id(b), id(obj)} + expected_result = {1, 2, get_id(a), get_id(b), get_id(obj)} assert set(result.keys()) == expected_result def test_bad_attribute(self): @@ -75,7 +75,7 @@ def __str__(self): t1 = Bad() result = DeepHash(t1) - expected_result = {id(t1): unprocessed, 'unprocessed': [t1]} + expected_result = {t1: unprocessed, 'unprocessed': [t1]} assert expected_result == result def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): @@ -98,37 +98,67 @@ class TestDeepHashPrep: def test_prep_str(self): obj = "a" - expected_result = {id(obj): prep_str(obj)} + expected_result = {obj: prep_str(obj)} result = DeepHashPrep(obj) assert expected_result == result - expected_result = {id(obj): prep_str(obj, ignore_string_type_changes=False)} + expected_result = {obj: prep_str(obj, ignore_string_type_changes=False)} result = DeepHashPrep(obj, ignore_string_type_changes=False) assert expected_result == result - def test_prep_str_fail_if_mutable(self): + def test_dictionary_key_type_change(self): + obj1 = {"b": 10} + obj2 = {b"b": 10} + + result1 = DeepHashPrep(obj1, ignore_string_type_changes=True) + result2 = DeepHashPrep(obj2, ignore_string_type_changes=True) + assert result1[obj1] == result2[obj2] + + def test_number_type_change(self): + obj1 = 10 + obj2 = 10.0 + + result1 = DeepHashPrep(obj1) + result2 = DeepHashPrep(obj2) + assert result1[obj1] != result2[obj2] + + result1 = DeepHashPrep(obj1, ignore_numeric_type_changes=True) + result2 = DeepHashPrep(obj2, ignore_numeric_type_changes=True) + assert result1[obj1] == result2[obj2] + + def test_prep_str_fail_if_deephash_leaks_results(self): """ This test fails if DeepHash is getting a mutable copy of hashes which means each init of the DeepHash will have hashes from the previous init. """ obj1 = "a" - id_obj1 = id(obj1) - expected_result = {id_obj1: prep_str(obj1)} + expected_result = {obj1: prep_str(obj1)} result = DeepHashPrep(obj1) assert expected_result == result obj2 = "b" result = DeepHashPrep(obj2) - assert id_obj1 not in result + assert obj1 not in result + + def test_dict_in_dict(self): + obj2 = {2: 3} + obj = {'a': obj2} + result = DeepHashPrep(obj) + assert 'a' in result + assert obj2 in result def do_list_or_tuple(self, func, func_str): string1 = "a" obj = func([string1, 10, 20]) + if func is list: + obj_id = get_id(obj) + else: + obj_id = obj string1_prepped = prep_str(string1) expected_result = { - id(10): 'int:10', - id(20): 'int:20', - id(string1): string1_prepped, - id(obj): '{}:{},int:10,int:20'.format(func_str, string1_prepped), + 10: 'int:10', + 20: 'int:20', + string1: string1_prepped, + obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped), } result = DeepHashPrep(obj) assert expected_result == result @@ -142,18 +172,17 @@ def test_named_tuples(self): # in that case due to a difference of string interning implementation # the id of x inside the named tuple changes. x = "x" - x_id = id(x) x_prep = prep_str(x) Point = namedtuple('Point', [x]) obj = Point(x=11) result = DeepHashPrep(obj) if pypy3: - assert result[id(obj)] == 'ntdict:{%s:int:11}' % x + assert result[get_id(obj)] == 'ntdict:{%s:int:11}' % x else: expected_result = { - x_id: x_prep, - id(obj): 'ntdict:{%s:int:11}' % x, - id(11): 'int:11', + x: x_prep, + obj: 'ntdict:{%s:int:11}' % x, + 11: 'int:11', } assert expected_result == result @@ -167,17 +196,17 @@ class MyEnum(Enum): # the ids of strings change if pypy3: # only compare the hashes for the enum instances themselves - assert DeepHashPrep(MyEnum.A)[id(MyEnum.A)] == ( + assert DeepHashPrep(MyEnum.A)[get_id(MyEnum.A)] == ( 'objdict:{' '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' '_name_:A;_value_:int:1}' ) - assert DeepHashPrep(MyEnum.B)[id(MyEnum.B)] == ( + assert DeepHashPrep(MyEnum.B)[get_id(MyEnum.B)] == ( 'objdict:{' '__objclass__:EnumMeta:objdict:{_name_:A;_value_:int:1};' '_name_:B;_value_:int:2}' ) - assert DeepHashPrep(MyEnum(1))[id(MyEnum.A)] == ( + assert DeepHashPrep(MyEnum(1))[get_id(MyEnum.A)] == ( 'objdict:{' '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' '_name_:A;_value_:int:1}' @@ -196,13 +225,13 @@ def test_dict_hash(self): key1_prepped = prep_str(key1) obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(1): 'int:1', - id(10): 'int:10', - id(2): 'int:2', - id(20): 'int:20', - id(key1): key1_prepped, - id(string1): string1_prepped, - id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1) + 1: 'int:1', + 10: 'int:10', + 2: 'int:2', + 20: 'int:20', + key1: key1_prepped, + string1: string1_prepped, + get_id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1) } result = DeepHashPrep(obj) assert expected_result == result @@ -213,16 +242,16 @@ def test_dict_in_list(self): dict1 = {key1: string1, 1: 10, 2: 20} obj = [0, dict1] expected_result = { - id(0): 'int:0', - id(1): 'int:1', - id(10): 'int:10', - id(2): 'int:2', - id(20): 'int:20', - id(key1): key1, - id(string1): string1, - id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % + 0: 'int:0', + 1: 'int:1', + 10: 'int:10', + 2: 'int:2', + 20: 'int:20', + key1: key1, + string1: string1, + get_id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1), - id(obj): + get_id(obj): 'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' % (key1, string1) } @@ -235,7 +264,7 @@ def test_nested_lists_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_nested_lists_same_hash2(self): t1 = [1, 2, [3, [4, 5]]] @@ -243,7 +272,7 @@ def test_nested_lists_same_hash2(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_nested_lists_same_hash3(self): t1 = [{1: [2, 3], 4: [5, [6, 7]]}] @@ -251,7 +280,7 @@ def test_nested_lists_same_hash3(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_nested_lists_in_dictionary_same_hash(self): t1 = [{"c": 4}, {"c": 3}] @@ -259,7 +288,7 @@ def test_nested_lists_in_dictionary_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_same_sets_same_hash(self): t1 = {1, 3, 2} @@ -267,7 +296,7 @@ def test_same_sets_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_similar_sets_with_significant_digits_same_hash(self): t1 = {0.012, 0.98} @@ -275,7 +304,7 @@ def test_similar_sets_with_significant_digits_same_hash(self): t1_hash = DeepHashPrep(t1, significant_digits=1) t2_hash = DeepHashPrep(t2, significant_digits=1) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_same_sets_in_lists_same_hash(self): t1 = ["a", {1, 3, 2}] @@ -283,7 +312,7 @@ def test_same_sets_in_lists_same_hash(self): t1_hash = DeepHashPrep(t1) t2_hash = DeepHashPrep(t2) - assert t1_hash[id(t1)] == t2_hash[id(t2)] + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] def test_unknown_parameters(self): with pytest.raises(ValueError): @@ -302,19 +331,19 @@ def __str__(self): t1 = Bad() result = DeepHashPrep(t1) - expected_result = {id(t1): unprocessed, 'unprocessed': [t1]} + expected_result = {t1: unprocessed, 'unprocessed': [t1]} assert expected_result == result def test_repetition_by_default_does_not_effect(self): list1 = [3, 4] - list1_id = id(list1) + list1_id = get_id(list1) a = [1, 2, list1] - a_id = id(a) + a_id = get_id(a) list2 = [4, 3, 3] - list2_id = id(list2) + list2_id = get_id(list2) b = [list2, 2, 1] - b_id = id(b) + b_id = get_id(b) hash_a = DeepHashPrep(a) hash_b = DeepHashPrep(b) @@ -324,14 +353,14 @@ def test_repetition_by_default_does_not_effect(self): def test_setting_repetition_off_unequal_hash(self): list1 = [3, 4] - list1_id = id(list1) + list1_id = get_id(list1) a = [1, 2, list1] - a_id = id(a) + a_id = get_id(a) list2 = [4, 3, 3] - list2_id = id(list2) + list2_id = get_id(list2) b = [list2, 2, 1] - b_id = id(b) + b_id = get_id(b) hash_a = DeepHashPrep(a, ignore_repetition=False) hash_b = DeepHashPrep(b, ignore_repetition=False) @@ -348,7 +377,7 @@ def hasher(obj): return str(next(hashes)) obj = "a" - expected_result = {id(obj): '0'} + expected_result = {obj: '0'} result = DeepHash(obj, hasher=hasher) assert expected_result == result @@ -361,14 +390,14 @@ def hasher(obj): assert expected_result == result2 result3 = DeepHash(obj, hasher=hasher) - expected_result = {id(obj): '1'} + expected_result = {obj: '1'} assert expected_result == result3 def test_skip_type(self): l1 = logging.getLogger("test") obj = {"log": l1, 2: 1337} result = DeepHashPrep(obj, exclude_types={logging.Logger}) - assert id(l1) not in result + assert get_id(l1) not in result def test_skip_type2(self): l1 = logging.getLogger("test") @@ -379,21 +408,21 @@ def test_prep_dic_with_loop(self): obj = {2: 1337} obj[1] = obj result = DeepHashPrep(obj) - expected_result = {id(obj): 'dict:{int:2:int:1337}', id(1): 'int:1', id(2): 'int:2', id(1337): 'int:1337'} + expected_result = {get_id(obj): 'dict:{int:2:int:1337}', 1: 'int:1', 2: 'int:2', 1337: 'int:1337'} assert expected_result == result def test_prep_iterable_with_loop(self): obj = [1] obj.append(obj) result = DeepHashPrep(obj) - expected_result = {id(obj): 'list:int:1', id(1): 'int:1'} + expected_result = {get_id(obj): 'list:int:1', 1: 'int:1'} assert expected_result == result def test_prep_iterable_with_excluded_type(self): l1 = logging.getLogger("test") obj = [1, l1] result = DeepHashPrep(obj, exclude_types={logging.Logger}) - assert id(l1) not in result + assert get_id(l1) not in result def test_skip_str_type_in_dict_on_list(self): dic1 = {1: "a"} @@ -402,7 +431,7 @@ def test_skip_str_type_in_dict_on_list(self): t2 = [dic2] t1_hash = DeepHashPrep(t1, exclude_types=[str]) t2_hash = DeepHashPrep(t2, exclude_types=[str]) - assert id(1) in t1_hash + assert 1 in t1_hash assert t1_hash[dic1] == t2_hash[dic2] def test_skip_path(self): @@ -412,8 +441,8 @@ def test_skip_path(self): t2 = [dic2, 2] t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]']) t2_hash = DeepHashPrep(t2, exclude_paths='root[0]') - assert id(1) not in t1_hash - assert id(2) in t1_hash + assert 1 not in t1_hash + assert 2 in t1_hash assert t1_hash[2] == t2_hash[2] def test_skip_regex_path(self): @@ -422,8 +451,8 @@ def test_skip_regex_path(self): exclude_re = re.compile(r'\[0\]') t1_hash = DeepHashPrep(t1, exclude_regex_paths=r'\[0\]') t2_hash = DeepHashPrep(t1, exclude_regex_paths=[exclude_re]) - assert id(1) not in t1_hash - assert id(2) in t1_hash + assert 1 not in t1_hash + assert 2 in t1_hash assert t1_hash[2] == t2_hash[2] @@ -433,7 +462,7 @@ class TestDeepHashSHA1: def test_prep_str_sha1(self): obj = "a" expected_result = { - id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } result = DeepHash(obj, hasher=DeepHash.sha1hex) assert expected_result == result @@ -445,20 +474,19 @@ def test_prep_str_sha1_fail_if_mutable(self): the previous init. """ obj1 = "a" - id_obj1 = id(obj1) expected_result = { - id_obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } result = DeepHash(obj1, hasher=DeepHash.sha1hex) assert expected_result == result obj2 = "b" result = DeepHash(obj2, hasher=DeepHash.sha1hex) - assert id_obj1 not in result + assert obj1 not in result def test_bytecode(self): obj = b"a" expected_result = { - id(obj): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } result = DeepHash(obj, hasher=DeepHash.sha1hex) assert expected_result == result @@ -467,10 +495,10 @@ def test_list1(self): string1 = "a" obj = [string1, 10, 20] expected_result = { - id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', - id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', - id(10): DeepHash.sha1hex('int:10'), - id(20): DeepHash.sha1hex('int:20'), + string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + get_id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', + 10: DeepHash.sha1hex('int:10'), + 20: DeepHash.sha1hex('int:20'), } result = DeepHash(obj, hasher=DeepHash.sha1hex) assert expected_result == result @@ -480,13 +508,13 @@ def test_dict1(self): key1 = "key1" obj = {key1: string1, 1: 10, 2: 20} expected_result = { - id(1): DeepHash.sha1hex('int:1'), - id(10): DeepHash.sha1hex('int:10'), - id(2): DeepHash.sha1hex('int:2'), - id(20): DeepHash.sha1hex('int:20'), - id(key1): '1073ab6cda4b991cd29f9e83a307f34004ae9327', - id(string1): '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', - id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' + 1: DeepHash.sha1hex('int:1'), + 10: DeepHash.sha1hex('int:10'), + 2: DeepHash.sha1hex('int:2'), + 20: DeepHash.sha1hex('int:20'), + key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327', + string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' } result = DeepHash(obj, hasher=DeepHash.sha1hex) assert expected_result == result From 9b89099e2a96ffe67db7c1c4bdc7c31283a4bb8b Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Wed, 13 Mar 2019 18:49:17 -0700 Subject: [PATCH 59/76] fixing more tests --- deepdiff/diff.py | 9 +++++++-- tests/test_diff_text.py | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 552a9d69..ff1d7c86 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -433,10 +433,15 @@ def __create_hashtable(self, t, level): hashes_all = DeepHash(item, hashes=self.hashes, exclude_types=self.exclude_types, + exclude_paths=self.exclude_paths, + exclude_regex_paths=self.exclude_regex_paths, + hasher=self.hasher, + ignore_repetition=not self.report_repetition, significant_digits=self.significant_digits, ignore_string_type_changes=self.ignore_string_type_changes, - hasher=self.hasher) - item_hash = hashes_all.get(id(item), item) + ignore_numeric_type_changes=self.ignore_numeric_type_changes + ) + item_hash = hashes_all[item] except Exception as e: # pragma: no cover logger.warning("Can not produce a hash for %s." "Not counting this object.\n %s" % diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 3e0e0254..69f5bf7e 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -78,8 +78,7 @@ def test_type_change_numeric(self): (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), (Decimal(10), 10.0, {}), ({"a": Decimal(10), "b": 12}, {b"b": 12, "a": 10.0}, {}), - ] - ) + ]) def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) assert expected_result == ddiff From b01d0b45fdc5aef531713058916669e691091512 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Wed, 13 Mar 2019 23:40:04 -0700 Subject: [PATCH 60/76] fixing tests --- deepdiff/deephash.py | 9 +++------ deepdiff/diff.py | 41 +++++++++++++++++++++++++++++++++-------- deepdiff/helper.py | 12 ++++++++++++ tests/test_diff_text.py | 2 +- tests/test_hash.py | 1 + 5 files changed, 50 insertions(+), 15 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 1fbe0d33..d8522f3e 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -12,7 +12,7 @@ from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, current_dir, convert_item_or_items_into_compiled_regexes_else_none, - get_id) + get_id, get_significant_digits) logger = logging.getLogger(__name__) @@ -24,7 +24,6 @@ KEY_TO_VAL_STR = "{}:{}" -DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 ZERO_DECIMAL_CHARACTERS = set("-0.") @@ -80,10 +79,8 @@ def __init__(self, hashes = hashes if hashes else {} self.update(hashes) self[UNPROCESSED] = [] - if ignore_numeric_type_changes and not significant_digits: - self.significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES - else: - self.significant_digits = significant_digits + + self.significant_digits = get_significant_digits(significant_digits, ignore_numeric_type_changes) self.ignore_string_type_changes = ignore_string_type_changes self.ignore_numeric_type_changes = ignore_numeric_type_changes # makes the hash return constant size result if true diff --git a/deepdiff/diff.py b/deepdiff/diff.py index ff1d7c86..578aa0b7 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -21,7 +21,7 @@ from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, - convert_item_or_items_into_set_else_none, + convert_item_or_items_into_set_else_none, get_significant_digits, convert_item_or_items_into_compiled_regexes_else_none) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship @@ -79,10 +79,7 @@ def __init__(self, self.hashes = {} self.hasher = hasher - if significant_digits is not None and significant_digits < 0: - raise ValueError( - "significant_digits must be None or a non-negative integer") - self.significant_digits = significant_digits + self.significant_digits = get_significant_digits(significant_digits, ignore_numeric_type_changes) self.tree = TreeResult() @@ -231,6 +228,23 @@ def __skip_this(self, level): return skip + def __get_clean_to_keys_mapping(self, keys, level): + result = {} + for key in keys: + if self.ignore_string_type_changes and isinstance(key, bytes): + clean_key = key.decode('utf-8') + elif self.ignore_numeric_type_changes and isinstance(key, numbers): + clean_key = ("{:.%sf}" % self.significant_digits).format(key) + else: + clean_key = key + if clean_key in result: + logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes' + 'or ignore_numeric_type_changes are set to be true.').format( + key, result[clean_key], level.path())) + else: + result[clean_key] = key + return result + def __diff_dict(self, level, parents_ids=frozenset({}), @@ -259,6 +273,13 @@ def __diff_dict(self, t1_keys = set(t1.keys()) t2_keys = set(t2.keys()) + if self.ignore_string_type_changes or self.ignore_numeric_type_changes: + t1_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t1_keys, level=level) + t2_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t2_keys, level=level) + t1_keys = set(t1_clean_to_keys.keys()) + t2_keys = set(t2_clean_to_keys.keys()) + else: + t1_clean_to_keys = t2_clean_to_keys = None t_keys_intersect = t2_keys.intersection(t1_keys) @@ -266,6 +287,7 @@ def __diff_dict(self, t_keys_removed = t1_keys - t_keys_intersect for key in t_keys_added: + key = t2_clean_to_keys[key] if t2_clean_to_keys else key change_level = level.branch_deeper( notpresent, t2[key], @@ -274,6 +296,7 @@ def __diff_dict(self, self.__report_result(item_added_key, change_level) for key in t_keys_removed: + key = t1_clean_to_keys[key] if t1_clean_to_keys else key change_level = level.branch_deeper( t1[key], notpresent, @@ -282,15 +305,17 @@ def __diff_dict(self, self.__report_result(item_removed_key, change_level) for key in t_keys_intersect: # key present in both dicts - need to compare values - item_id = id(t1[key]) + key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key + key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key + item_id = id(t1[key1]) if parents_ids and item_id in parents_ids: continue parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( - t1[key], - t2[key], + t1[key1], + t2[key2], child_relationship_class=rel_class, child_relationship_param=key) self.__diff(next_level, parents_ids_added) diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 6dca007e..ed351c89 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -187,3 +187,15 @@ def get_id(obj): Adding some characters to id so they are not just integers to reduce the risk of collision. """ return "{}{}".format(ID_PREFIX, id(obj)) + + +DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 + + +def get_significant_digits(significant_digits, ignore_numeric_type_changes): + if ignore_numeric_type_changes and not significant_digits: + significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES + if significant_digits is not None and significant_digits < 0: + raise ValueError( + "significant_digits must be None or a non-negative integer") + return significant_digits diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 69f5bf7e..b458d60c 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -77,7 +77,7 @@ def test_type_change_numeric(self): (10, 10.0, {}), (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), (Decimal(10), 10.0, {}), - ({"a": Decimal(10), "b": 12}, {b"b": 12, "a": 10.0}, {}), + ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), ]) def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) diff --git a/tests/test_hash.py b/tests/test_hash.py index 7deef2cd..64b6951b 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -112,6 +112,7 @@ def test_dictionary_key_type_change(self): result1 = DeepHashPrep(obj1, ignore_string_type_changes=True) result2 = DeepHashPrep(obj2, ignore_string_type_changes=True) assert result1[obj1] == result2[obj2] + assert result1["b"] == result2[b"b"] def test_number_type_change(self): obj1 = 10 From 55076e84e518c541b0882f3739e575d1e8dd097b Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sun, 17 Mar 2019 22:16:11 -0700 Subject: [PATCH 61/76] moving some code to base --- deepdiff/base.py | 36 +++++++++++++++++++ deepdiff/deephash.py | 37 +++++++++++++++----- deepdiff/deephash_doc.rst | 74 ++++++++++++++++++++++++--------------- deepdiff/diff.py | 50 +++++++++----------------- deepdiff/diff_doc.rst | 25 +++++++++++-- deepdiff/helper.py | 12 ------- tests/test_diff_text.py | 2 +- tests/test_hash.py | 58 ++++++++++++++++++++---------- 8 files changed, 188 insertions(+), 106 deletions(-) create mode 100644 deepdiff/base.py diff --git a/deepdiff/base.py b/deepdiff/base.py new file mode 100644 index 00000000..5e6562c0 --- /dev/null +++ b/deepdiff/base.py @@ -0,0 +1,36 @@ +from deepdiff.helper import strings, numbers + + +DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 + + +class Base: + numbers = numbers + strings = strings + + def get_significant_digits(self, significant_digits, ignore_numeric_type_changes): + if ignore_numeric_type_changes and not significant_digits: + significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES + if significant_digits is not None and significant_digits < 0: + raise ValueError( + "significant_digits must be None or a non-negative integer") + return significant_digits + + def get_ignore_types_in_groups(self, ignore_type_in_groups, + ignore_string_type_changes, + ignore_numeric_type_changes): + if ignore_type_in_groups: + if isinstance(ignore_type_in_groups[0], type): + ignore_type_in_groups = [tuple(ignore_type_in_groups)] + else: + ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) + else: + ignore_type_in_groups = [] + + if ignore_string_type_changes and self.strings not in ignore_type_in_groups: + ignore_type_in_groups.append(self.strings) + + if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: + ignore_type_in_groups.append(self.numbers) + + return ignore_type_in_groups diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index d8522f3e..cfae19a1 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -12,7 +12,8 @@ from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, current_dir, convert_item_or_items_into_compiled_regexes_else_none, - get_id, get_significant_digits) + get_id) +from deepdiff.base import Base logger = logging.getLogger(__name__) @@ -43,7 +44,7 @@ def prepare_string_for_hashing(obj, ignore_string_type_changes=False): doc = doc_file.read() -class DeepHash(dict): +class DeepHash(dict, Base): __doc__ = doc MURMUR_SEED = 1203 @@ -57,8 +58,9 @@ def __init__(self, hasher=None, ignore_repetition=True, significant_digits=None, - constant_size=True, - ignore_string_type_changes=True, + apply_hash=True, + ignore_type_in_groups=None, + ignore_string_type_changes=False, ignore_numeric_type_changes=False, **kwargs): if kwargs: @@ -66,7 +68,7 @@ def __init__(self, ("The following parameter(s) are not valid: %s\n" "The valid parameters are obj, hashes, exclude_types," "exclude_paths, exclude_regex_paths, hasher, ignore_repetition," - "significant_digits, constant_size, ignore_string_type_changes," + "significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes," "ignore_numeric_type_changes") % ', '.join(kwargs.keys())) self.obj = obj exclude_types = set() if exclude_types is None else set(exclude_types) @@ -80,13 +82,16 @@ def __init__(self, self.update(hashes) self[UNPROCESSED] = [] - self.significant_digits = get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.ignore_type_in_groups = self.get_ignore_types_in_groups( + ignore_type_in_groups, + ignore_string_type_changes, ignore_numeric_type_changes) self.ignore_string_type_changes = ignore_string_type_changes self.ignore_numeric_type_changes = ignore_numeric_type_changes # makes the hash return constant size result if true # the only time it should be set to False is when # testing the individual hash functions for different types of objects. - self.constant_size = constant_size + self.apply_hash = apply_hash self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)})) @@ -101,9 +106,23 @@ def sha1hex(obj): obj = obj.encode('utf-8') return sha1(obj).hexdigest() + @staticmethod + def murmur3_64bit(obj): + """ + Use murmur3_64bit for 64 bit hash by passing this method: + hasher=DeepHash.murmur3_64bit + """ + obj = obj.encode('utf-8') + # This version of murmur3 returns two 64bit integers. + return mmh3.hash64(obj, DeepHash.MURMUR_SEED)[0] + @staticmethod def murmur3_128bit(obj): - """Use murmur3_128bit for 128 bit hash (default).""" + """ + Use murmur3_128bit for bit hash by passing this method: + hasher=DeepHash.murmur3_128bit + This hasher is the default hasher. + """ obj = obj.encode('utf-8') return mmh3.hash128(obj, DeepHash.MURMUR_SEED) @@ -290,7 +309,7 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): elif result is unprocessed: pass - elif self.constant_size: + elif self.apply_hash: if isinstance(obj, strings): result_cleaned = result else: diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 430bc85c..48b12557 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -6,28 +6,28 @@ This way 2 objects with the same content should have the same hash. The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. For example you can use DeepHash to calculate the hash of a set or a dictionary! -The core of DeepHash is a deterministic serialization of your object into a string so it -can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. -but you can pass another hash function to it if you want. +At the core of it, DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a +fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead. **Parameters** obj : any object, The object to be hashed based on its content. hashes: dictionary, default = empty dictionary - A dictionary of {object id: object hash} to start with. - Any object that is encountered and its id is already in the hashes dictionary, + A dictionary of {object or object id: object hash} to start with. + Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, will re-use the hash that is provided by this dictionary instead of re-calculating - its hash. + its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. exclude_types: list, default = None List of object types to exclude from hashing. exclude_paths: list, default = None - List of paths to exclude from the report. If only one item, you can path it as a string. + List of paths to exclude from the report. If only one item, you can path it as a string instead of a list containing only one path. exclude_regex_paths: list, default = None - List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can path it as a string instead of a list containing only one regex path. hasher: function. default = DeepHash.murmur3_128bit hasher is the hashing function. The default is DeepHash.murmur3_128bit. @@ -37,8 +37,11 @@ hasher: function. default = DeepHash.murmur3_128bit You can use it by passing: hasher=hash for Python's builtin hash. - SHA1 is already provided as an alternative too: - You can use it by passing: hasher=DeepHash.sha1hex + The following alternatives are already provided: + + - hasher=DeepHash.murmur3_128bit + - hasher=DeepHash.murmur3_64bit + - hasher=DeepHash.sha1hex ignore_repetition: Boolean, default = True If repetitions in an iterable should cause the hash of iterable to be different. @@ -49,32 +52,40 @@ significant_digits : int >= 0, default=None If it is a non negative integer, it compares only that many digits AFTER the decimal point. - This only affects floats, decimal.Decimal and complex. - - Takse a look at DeepDiff.diff docs for explanation of how this works. + This only affects floats, decimal.Decimal and complex numbers. -constant_size: Boolean, default = True - What DeepHash does is to "prep" the contents of objects into strings. - If constant_size is set, then it actually goes ahead and hashes the string - using the hasher function. + Take a look at DeepDiff.diff docs for explanation of how this works. - The only time you want the constant_size to be False is if you want to know what +apply_hash: Boolean, default = True + DeepHash at its core is doing deterministic serialization of objects into strings. + Then it hashes the string. + The only time you want the apply_hash to be False is if you want to know what the string representation of your object is BEFORE it gets hashed. ignore_string_type_changes: Boolean, default = True string type conversions should not affect the hash output when this is set to True. For example "Hello" and b"Hello" should produce the same hash. +ignore_numeric_type_changes: Boolean, default = True + numeric type conversions should not affect the hash output when this is set to True. + For example 10, 10.0 and Decimal(10) should produce the same hash. + However when ignore_numeric_type_changes is set to True, all numbers are converted + to decimals with the precision of significant_digits parameter. + If no significant_digits is passed by the user, a default value of 55 is used. + + For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 + + That way they both produce the same hash. + **Returns** - A dictionary of {item id: item hash}. - If your object is nested, it will build hashes of all the objects it contains! + A dictionary of {item: item hash}. + If your object is nested, it will build hashes of all the objects it contains too. **Examples** Let's say you have a dictionary object. >>> from deepdiff import DeepHash - >>> >>> obj = {1: 2, 'a': 'b'} If you try to hash it: @@ -87,7 +98,7 @@ But with DeepHash: >>> from deepdiff import DeepHash >>> obj = {1: 2, 'a': 'b'} >>> DeepHash(obj) - {4355639248: 2468916477072481777512283587789292749, 4355639280: -35787773492556653776377555218122431491, 4358636128: -88390647972316138151822486391929534118, 4358009664: 8833996863197925870419376694314494743, 4357467952: 34150898645750099477987229399128149852} + {1: 234041559348429806012597903916437026784, 2: 148655924348182454950690728321917595655, 'a': 119173504597196970070553896747624927922, 'b': 4994827227437929991738076607196210252, '!>*id4488569408': 32452838416412500686422093274247968754} So what is exactly the hash of obj in this case? DeepHash is calculating the hash of the obj and any other object that obj contains. @@ -102,11 +113,8 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -The result hash is 34150898645750099477987229399128149852. -In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. -Using Murmur 3 64bit for hashing is preferred (and is the default behaviour) -since the chance of hash collision will be minimal and hashing will be deterministic -and will not depend on the version of the Python. +The result hash is 34150898645750099477987229399128149852 which is generated by +Murmur 3 128bit hashing algorithm. If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. Read more about Murmur3 here: https://en.wikipedia.org/wiki/MurmurHash If you do a deep copy of obj, it should still give you the same hash: >>> from copy import deepcopy @@ -119,6 +127,14 @@ Note that by default DeepHash will ignore string type differences. So if your st >>> DeepHash(obj3)[obj3] 34150898645750099477987229399128149852 -But if you want a different hash if string types are different, set ignore_string_type_changes to True: - >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] +But if you want a different hash if string types are different, set ignore_string_type_changes to False: + >>> DeepHash(obj3, ignore_string_type_changes=False)[obj3] 64067525765846024488103933101621212760 + +On the other hand, ignore_numeric_type_changes is by default False. + >>> obj1 = {4:10} + >>> obj2 = {4.0: Decimal(10.0)} + >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] + False + >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] + True diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 578aa0b7..31249800 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -12,6 +12,7 @@ import json import jsonpickle import warnings +import os from decimal import Decimal from itertools import zip_longest @@ -21,12 +22,13 @@ from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, - convert_item_or_items_into_set_else_none, get_significant_digits, - convert_item_or_items_into_compiled_regexes_else_none) + convert_item_or_items_into_set_else_none, + convert_item_or_items_into_compiled_regexes_else_none, current_dir) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship from deepdiff.deephash import DeepHash +from deepdiff.base import Base logger = logging.getLogger(__name__) warnings.simplefilter('once', DeprecationWarning) @@ -34,22 +36,24 @@ TREE_VIEW = 'tree' TEXT_VIEW = 'text' +with open(os.path.join(current_dir, 'diff_doc.rst'), 'r') as doc_file: + doc = doc_file.read() -class DeepDiff(ResultDict): - numbers = numbers - strings = strings + +class DeepDiff(ResultDict, Base): + __doc__ = doc def __init__(self, t1, t2, ignore_order=False, - ignore_type_in_groups=None, report_repetition=False, significant_digits=None, exclude_paths=None, exclude_regex_paths=None, exclude_types=None, - ignore_string_type_changes=None, + ignore_type_in_groups=None, + ignore_string_type_changes=False, ignore_numeric_type_changes=False, verbose_level=1, view=TEXT_VIEW, @@ -58,15 +62,13 @@ def __init__(self, if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are ignore_order, report_repetition, significant_digits, ignore_type_in_groups" - "exclude_paths, exclude_types, exclude_regex_paths, verbose_level and view.") % ', '.join(kwargs.keys())) + "The valid parameters are ignore_order, report_repetition, significant_digits, " + "exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " + "ignore_string_type_changes, ignore_numeric_type_changes, verbose_level, view, " + "and hasher.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order - if ignore_string_type_changes is not None and ignore_type_in_groups is not None: - raise ValueError('Please set either ignore_string_type_changes or ignore_type_in_groups but not both.') - if ignore_type_in_groups is None and ignore_string_type_changes is None: - ignore_string_type_changes = True - self.ignore_type_in_groups = self._get_ignore_types_in_groups( + self.ignore_type_in_groups = self.get_ignore_types_in_groups( ignore_type_in_groups, ignore_string_type_changes, ignore_numeric_type_changes) self.report_repetition = report_repetition @@ -79,7 +81,7 @@ def __init__(self, self.hashes = {} self.hasher = hasher - self.significant_digits = get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) self.tree = TreeResult() @@ -94,24 +96,6 @@ def __init__(self, view_results = self._get_view_results(view) self.update(view_results) - def _get_ignore_types_in_groups(self, ignore_type_in_groups, - ignore_string_type_changes, ignore_numeric_type_changes): - if ignore_type_in_groups: - if isinstance(ignore_type_in_groups[0], type): - ignore_type_in_groups = [tuple(ignore_type_in_groups)] - else: - ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) - else: - ignore_type_in_groups = [] - - if ignore_string_type_changes and self.strings not in ignore_type_in_groups: - ignore_type_in_groups.append(self.strings) - - if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: - ignore_type_in_groups.append(self.numbers) - - return ignore_type_in_groups - def _get_view_results(self, view): """ Get the results based on the view diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 7798ab47..22b56305 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -61,6 +61,15 @@ view: string, default = text The new view is called the tree view which allows you to traverse through the tree of changed items. +ignore_string_type_changes: Boolean, default = False + Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. + +ignore_numeric_type_changes: Boolean, default = False + Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. + +ignore_type_in_groups: List, default = [] + + **Returns** A DeepDiff object that has already calculated the difference of the 2 items. @@ -196,7 +205,9 @@ And if you don't care about the value of items that have changed type, please se ignore_type_in_groups Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. -The shortcuts are ignore_string_type_changes which by default is True and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more power compared to the shortcuts. For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: +The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + +For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: 1. Set ignore_string_type_changes=True which is the default. 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . @@ -206,7 +217,15 @@ Now what if you want also typeA and typeB to be ignored when comparing agains ea 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] -Note that you can either set the ignore_type_in_groups or the shortcuts but not both. +ignore_string_type_changes +Default: False + >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) + {} + >>> DeepDiff(b'hello', 'hello') + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} + +ignore_numeric_type_changes +Default: False Ignore Type Number - Dictionary that contains float and integer: >>> from deepdiff import DeepDiff @@ -687,7 +706,7 @@ Serialize and then deserialize back to deepdiff >>> ddiff = DeepDiff(t1, t2) >>> jsoned = ddiff.to_json_pickle >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' + '{"type_changes": {"root[2]": {"py/object": "dict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) >>> ddiff == ddiff_new True diff --git a/deepdiff/helper.py b/deepdiff/helper.py index ed351c89..6dca007e 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -187,15 +187,3 @@ def get_id(obj): Adding some characters to id so they are not just integers to reduce the risk of collision. """ return "{}{}".format(ID_PREFIX, id(obj)) - - -DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 - - -def get_significant_digits(significant_digits, ignore_numeric_type_changes): - if ignore_numeric_type_changes and not significant_digits: - significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES - if significant_digits is not None and significant_digits < 0: - raise ValueError( - "significant_digits must be None or a non-negative integer") - return significant_digits diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index b458d60c..185cbab2 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -48,7 +48,7 @@ def test_item_type_change_for_strings_ignored_by_default(self): t1 = 'hello' t2 = b'hello' - ddiff = DeepDiff(t1, t2) + ddiff = DeepDiff(t1, t2, ignore_string_type_changes=True) assert not ddiff def test_item_type_change_for_strings_override(self): diff --git a/tests/test_hash.py b/tests/test_hash.py index 64b6951b..0cf3838f 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -26,7 +26,7 @@ def __repr__(self): # Only the prep part of DeepHash. We don't need to test the actual hash function. -DeepHashPrep = partial(DeepHash, constant_size=False) +DeepHashPrep = partial(DeepHash, apply_hash=False) def prep_str(obj, ignore_string_type_changes=True): @@ -81,15 +81,15 @@ def __str__(self): def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): a = 'hello' b = b'hello' - a_hash = DeepHash(a)[a] - b_hash = DeepHash(b)[b] + a_hash = DeepHash(a, ignore_string_type_changes=True)[a] + b_hash = DeepHash(b, ignore_string_type_changes=True)[b] assert a_hash == b_hash def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): a = 'hello' b = b'hello' - a_hash = DeepHash(a, hasher=DeepHash.sha1hex)[a] - b_hash = DeepHash(b, hasher=DeepHash.sha1hex)[b] + a_hash = DeepHash(a, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[a] + b_hash = DeepHash(b, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[b] assert a_hash == b_hash @@ -99,7 +99,7 @@ class TestDeepHashPrep: def test_prep_str(self): obj = "a" expected_result = {obj: prep_str(obj)} - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) assert expected_result == result expected_result = {obj: prep_str(obj, ignore_string_type_changes=False)} result = DeepHashPrep(obj, ignore_string_type_changes=False) @@ -134,16 +134,16 @@ def test_prep_str_fail_if_deephash_leaks_results(self): """ obj1 = "a" expected_result = {obj1: prep_str(obj1)} - result = DeepHashPrep(obj1) + result = DeepHashPrep(obj1, ignore_string_type_changes=True) assert expected_result == result obj2 = "b" - result = DeepHashPrep(obj2) + result = DeepHashPrep(obj2, ignore_string_type_changes=True) assert obj1 not in result def test_dict_in_dict(self): obj2 = {2: 3} obj = {'a': obj2} - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) assert 'a' in result assert obj2 in result @@ -161,7 +161,7 @@ def do_list_or_tuple(self, func, func_str): string1: string1_prepped, obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped), } - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) assert expected_result == result def test_list_and_tuple(self): @@ -176,7 +176,7 @@ def test_named_tuples(self): x_prep = prep_str(x) Point = namedtuple('Point', [x]) obj = Point(x=11) - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) if pypy3: assert result[get_id(obj)] == 'ntdict:{%s:int:11}' % x else: @@ -234,7 +234,7 @@ def test_dict_hash(self): string1: string1_prepped, get_id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1) } - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) assert expected_result == result def test_dict_in_list(self): @@ -256,7 +256,7 @@ def test_dict_in_list(self): 'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' % (key1, string1) } - result = DeepHashPrep(obj) + result = DeepHashPrep(obj, ignore_string_type_changes=True) assert expected_result == result def test_nested_lists_same_hash(self): @@ -465,7 +465,7 @@ def test_prep_str_sha1(self): expected_result = { obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result def test_prep_str_sha1_fail_if_mutable(self): @@ -478,10 +478,10 @@ def test_prep_str_sha1_fail_if_mutable(self): expected_result = { obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } - result = DeepHash(obj1, hasher=DeepHash.sha1hex) + result = DeepHash(obj1, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result obj2 = "b" - result = DeepHash(obj2, hasher=DeepHash.sha1hex) + result = DeepHash(obj2, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert obj1 not in result def test_bytecode(self): @@ -489,7 +489,7 @@ def test_bytecode(self): expected_result = { obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result def test_list1(self): @@ -501,7 +501,7 @@ def test_list1(self): 10: DeepHash.sha1hex('int:10'), 20: DeepHash.sha1hex('int:20'), } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result def test_dict1(self): @@ -517,7 +517,7 @@ def test_dict1(self): string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' } - result = DeepHash(obj, hasher=DeepHash.sha1hex) + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) assert expected_result == result @@ -532,3 +532,23 @@ class TestCleaningString: def test_clean_type(self, text, ignore_string_type_changes, expected_result): result = prepare_string_for_hashing(text, ignore_string_type_changes=ignore_string_type_changes) assert expected_result == result + + +class TestDeepHashMurmur3: + """DeepHash with Murmur3 Hash Tests.""" + + def test_prep_str_murmur3_64bit(self): + obj = "a" + expected_result = { + obj: 424475663186367154 + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_64bit) + assert expected_result == result + + def test_prep_str_murmur3_128bit(self): + obj = "a" + expected_result = { + obj: 119173504597196970070553896747624927922 + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_128bit) + assert expected_result == result From 760467943839681874c09384ef259b7a983171fa Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Mar 2019 22:53:59 -0700 Subject: [PATCH 62/76] fixing type ignoring --- deepdiff/base.py | 9 +++++---- deepdiff/deephash.py | 41 +++++++++++++++++++++++++---------------- deepdiff/diff.py | 7 ++++--- deepdiff/diff_doc.rst | 5 +---- tests/test_diff_text.py | 22 ++++++++++++++++++++++ tests/test_hash.py | 37 ++++++++++++++++++++++++++++++++----- 6 files changed, 89 insertions(+), 32 deletions(-) diff --git a/deepdiff/base.py b/deepdiff/base.py index 5e6562c0..11f1b97f 100644 --- a/deepdiff/base.py +++ b/deepdiff/base.py @@ -1,3 +1,4 @@ +from ordered_set import OrderedSet from deepdiff.helper import strings, numbers @@ -21,16 +22,16 @@ def get_ignore_types_in_groups(self, ignore_type_in_groups, ignore_numeric_type_changes): if ignore_type_in_groups: if isinstance(ignore_type_in_groups[0], type): - ignore_type_in_groups = [tuple(ignore_type_in_groups)] + ignore_type_in_groups = [OrderedSet(ignore_type_in_groups)] else: - ignore_type_in_groups = list(map(tuple, ignore_type_in_groups)) + ignore_type_in_groups = list(map(OrderedSet, ignore_type_in_groups)) else: ignore_type_in_groups = [] if ignore_string_type_changes and self.strings not in ignore_type_in_groups: - ignore_type_in_groups.append(self.strings) + ignore_type_in_groups.append(OrderedSet(self.strings)) if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: - ignore_type_in_groups.append(self.numbers) + ignore_type_in_groups.append(OrderedSet(self.numbers)) return ignore_type_in_groups diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index cfae19a1..b35698b6 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -18,6 +18,8 @@ logger = logging.getLogger(__name__) UNPROCESSED = 'unprocessed' +MURMUR_SEED = 1203 + RESERVED_DICT_KEYS = {UNPROCESSED} EMPTY_FROZENSET = frozenset({}) @@ -47,8 +49,6 @@ def prepare_string_for_hashing(obj, ignore_string_type_changes=False): class DeepHash(dict, Base): __doc__ = doc - MURMUR_SEED = 1203 - def __init__(self, obj, hashes=None, @@ -114,7 +114,7 @@ def murmur3_64bit(obj): """ obj = obj.encode('utf-8') # This version of murmur3 returns two 64bit integers. - return mmh3.hash64(obj, DeepHash.MURMUR_SEED)[0] + return mmh3.hash64(obj, MURMUR_SEED)[0] @staticmethod def murmur3_128bit(obj): @@ -124,7 +124,7 @@ def murmur3_128bit(obj): This hasher is the default hasher. """ obj = obj.encode('utf-8') - return mmh3.hash128(obj, DeepHash.MURMUR_SEED) + return mmh3.hash128(obj, MURMUR_SEED) def __getitem__(self, obj): # changed_to_id = False @@ -152,6 +152,7 @@ def __contains__(self, obj): def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=False): """Difference of 2 objects""" + original_type = type(obj) try: if is_namedtuple: obj = obj._asdict() @@ -164,7 +165,8 @@ def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=Fals self[UNPROCESSED].append(obj) return unprocessed - result = self._prep_dict(obj, parent, parents_ids, print_as_attribute=True) + result = self._prep_dict(obj, parent=parent, parents_ids=parents_ids, + print_as_attribute=True, original_type=original_type) result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) return result @@ -181,7 +183,7 @@ def _skip_this(self, obj, parent): return skip - def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False): + def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None): result = [] @@ -201,12 +203,19 @@ def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribut result.sort() result = ';'.join(result) - result = "dict:{%s}" % result - - return result + if print_as_attribute: + type_ = original_type or type(obj) + type_str = type_.__name__ + for type_group in self.ignore_type_in_groups: + if type_ in type_group: + type_str = ','.join(map(lambda x: x.__name__, type_group)) + break + else: + type_str = 'dict' + return "%s:{%s}" % (type_str, result) def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): - return "set:{}".format(self._prep_iterable(obj, parent, parents_ids)) + return "set:{}".format(self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)) def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): @@ -258,7 +267,7 @@ def _prep_tuple(self, obj, parent, parents_ids): obj._asdict # It must be a normal tuple except AttributeError: - result = self._prep_iterable(obj, parent, parents_ids) + result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) # We assume it is a namedtuple then else: result = self._prep_obj(obj, parent, parents_ids=parents_ids, is_namedtuple=True) @@ -289,19 +298,19 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): result = self._prep_number(obj) elif isinstance(obj, MutableMapping): - result = self._prep_dict(obj, parent, parents_ids) + result = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids) elif isinstance(obj, tuple): - result = self._prep_tuple(obj, parent, parents_ids) + result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids) elif isinstance(obj, (set, frozenset)): - result = self._prep_set(obj, parent, parents_ids) + result = self._prep_set(obj=obj, parent=parent, parents_ids=parents_ids) elif isinstance(obj, Iterable): - result = self._prep_iterable(obj, parent, parents_ids) + result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) else: - result = self._prep_obj(obj, parent, parents_ids) + result = self._prep_obj(obj=obj, parent=parent, parents_ids=parents_ids) if result is not_hashed: # pragma: no cover self[UNPROCESSED].append(obj) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 31249800..9278f3ba 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -217,7 +217,7 @@ def __get_clean_to_keys_mapping(self, keys, level): for key in keys: if self.ignore_string_type_changes and isinstance(key, bytes): clean_key = key.decode('utf-8') - elif self.ignore_numeric_type_changes and isinstance(key, numbers): + elif self.ignore_numeric_type_changes and type(key) in numbers: clean_key = ("{:.%sf}" % self.significant_digits).format(key) else: clean_key = key @@ -448,7 +448,8 @@ def __create_hashtable(self, t, level): ignore_repetition=not self.report_repetition, significant_digits=self.significant_digits, ignore_string_type_changes=self.ignore_string_type_changes, - ignore_numeric_type_changes=self.ignore_numeric_type_changes + ignore_numeric_type_changes=self.ignore_numeric_type_changes, + ignore_type_in_groups=self.ignore_type_in_groups, ) item_hash = hashes_all[item] except Exception as e: # pragma: no cover @@ -578,7 +579,7 @@ def __diff(self, level, parents_ids=frozenset({})): if type(level.t1) != type(level.t2): # NOQA report_type_change = True for type_group in self.ignore_type_in_groups: - if isinstance(level.t1, type_group) and isinstance(level.t2, type_group): + if type(level.t1) in type_group and type(level.t2) in type_group: report_type_change = False break if report_type_change: diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 22b56305..50b3162f 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -35,8 +35,6 @@ significant_digits : int >= 0, default=None. For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) -ignore_type_in_groups : Tuple or List of Tuples, default=None ignores types when t1 and t2 are both within the same type group. - verbose_level : int >= 0, default = 1. Higher verbose level shows you more details. For example verbose level 1 shows what dictionary item are added or removed. @@ -67,8 +65,7 @@ ignore_string_type_changes: Boolean, default = False ignore_numeric_type_changes: Boolean, default = False Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. -ignore_type_in_groups: List, default = [] - +ignore_type_in_groups: Tuple or List of Tuples, default=None ignores types when t1 and t2 are both within the same type group. **Returns** diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 185cbab2..50e4f741 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1020,6 +1020,28 @@ def test_dictionary_of_custom_objects(self): result = {} assert result == ddiff + def test_custom_object_type_change_when_ignore_order(self): + + class Burrito: + bread = 'flour' + + def __init__(self): + self.spicy = True + + class Taco: + bread = 'flour' + + def __init__(self): + self.spicy = True + + burrito = Burrito() + taco = Taco() + + burritos = [burrito] + tacos = [taco] + + assert not DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + def test_loop(self): class LoopTest(object): def __init__(self, a): diff --git a/tests/test_hash.py b/tests/test_hash.py index 0cf3838f..ad1249db 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -63,7 +63,7 @@ def test_list_of_sets(self): assert set(result.keys()) == expected_result def test_bad_attribute(self): - class Bad(object): + class Bad: __slots__ = ['x', 'y'] def __getattr__(self, key): @@ -178,11 +178,11 @@ def test_named_tuples(self): obj = Point(x=11) result = DeepHashPrep(obj, ignore_string_type_changes=True) if pypy3: - assert result[get_id(obj)] == 'ntdict:{%s:int:11}' % x + assert result[get_id(obj)] == "ntPoint:{%s:int:11}" % x else: expected_result = { x: x_prep, - obj: 'ntdict:{%s:int:11}' % x, + obj: "ntPoint:{%s:int:11}" % x, 11: 'int:11', } assert expected_result == result @@ -319,8 +319,8 @@ def test_unknown_parameters(self): with pytest.raises(ValueError): DeepHashPrep(1, wrong_param=2) - def test_bad_attribute(self): - class Bad(object): + def test_bad_attribute_prep(self): + class Bad: __slots__ = ['x', 'y'] def __getattr__(self, key): @@ -335,6 +335,33 @@ def __str__(self): expected_result = {t1: unprocessed, 'unprocessed': [t1]} assert expected_result == result + class Burrito: + bread = 'flour' + + def __init__(self): + self.spicy = True + + class Taco: + bread = 'flour' + + def __init__(self): + self.spicy = True + + burrito = Burrito() + taco = Taco() + + @pytest.mark.parametrize("t1, t2, ignore_type_in_groups, is_qual", [ + (taco, burrito, [], False), + (taco, burrito, [(Taco, Burrito)], True), + ([taco], [burrito], [(Taco, Burrito)], True), + + ]) + def test_objects_with_same_content(self, t1, t2, ignore_type_in_groups, is_qual): + + t1_result = DeepHashPrep(t1, ignore_type_in_groups=ignore_type_in_groups) + t2_result = DeepHashPrep(t2, ignore_type_in_groups=ignore_type_in_groups) + assert is_qual == (t1_result[t1] == t2_result[t2]) + def test_repetition_by_default_does_not_effect(self): list1 = [3, 4] list1_id = get_id(list1) From 367bfbef808a8b52ec38f63ad96e022d532a7b85 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Mar 2019 22:56:48 -0700 Subject: [PATCH 63/76] adding more tests --- tests/test_diff_text.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 50e4f741..ecb1e3d2 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1333,6 +1333,13 @@ def test_ignore_type_in_groups_numbers_and_strings(self): result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} assert result == ddiff + def test_ignore_type_in_groups_numbers_and_strings_when_ignore_order(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, ignore_order=True) + result = {'iterable_item_added': {'root[2]': 3.3}, 'iterable_item_removed': {'root[2]': 3}} + assert result == ddiff + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_base_level_dictionary_remapping(self): """ From 2062a609d51c756ddb5f24403bbc0a0849f232ec Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Mon, 18 Mar 2019 23:54:46 -0700 Subject: [PATCH 64/76] increasing coverage --- deepdiff/diff.py | 19 +++++++++--------- deepdiff/helper.py | 7 +++++++ deepdiff/model.py | 14 ++++++++++--- tests/test_diff_text.py | 44 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 9278f3ba..e0936266 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -22,7 +22,7 @@ from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, - convert_item_or_items_into_set_else_none, + convert_item_or_items_into_set_else_none, get_type, convert_item_or_items_into_compiled_regexes_else_none, current_dir) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel from deepdiff.model import DictRelationship, AttributeRelationship @@ -155,16 +155,17 @@ def unmangle(attribute): all_slots = [] if isinstance(object, type): - mro = object.__mro__ + mro = object.__mro__ # pragma: no cover. I have not been able to write a test for this case. But we still check for it. else: mro = object.__class__.__mro__ for type_in_mro in mro: - slots = getattr(type_in_mro, '__slots__', ()) - if isinstance(slots, strings): - all_slots.append(slots) - else: - all_slots.extend(slots) + slots = getattr(type_in_mro, '__slots__', None) + if slots: + if isinstance(slots, strings): + all_slots.append(slots) + else: + all_slots.extend(slots) return {i: getattr(object, unmangle(i)) for i in all_slots} @@ -576,10 +577,10 @@ def __diff(self, level, parents_ids=frozenset({})): if self.__skip_this(level): return - if type(level.t1) != type(level.t2): # NOQA + if get_type(level.t1) != get_type(level.t2): report_type_change = True for type_group in self.ignore_type_in_groups: - if type(level.t1) in type_group and type(level.t2) in type_group: + if get_type(level.t1) in type_group and get_type(level.t2) in type_group: report_type_change = False break if report_type_change: diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 6dca007e..53e25220 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -187,3 +187,10 @@ def get_id(obj): Adding some characters to id so they are not just integers to reduce the risk of collision. """ return "{}{}".format(ID_PREFIX, id(obj)) + + +def get_type(obj): + """ + Get the type of object or if it is a class, return the class itself. + """ + return obj if type(obj) is type else type(obj) diff --git a/deepdiff/model.py b/deepdiff/model.py index 2b7a00da..7ad06dec 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -127,13 +127,21 @@ def _from_tree_default(self, tree, report_type): def _from_tree_type_changes(self, tree): if 'type_changes' in tree: for change in tree['type_changes']: + if type(change.t1) is type: + include_values = False + old_type = change.t1 + new_type = change.t2 + else: + include_values = True + old_type = type(change.t1) + new_type = type(change.t2) remap_dict = RemapDict({ - 'old_type': type(change.t1), - 'new_type': type(change.t2) + 'old_type': old_type, + 'new_type': new_type }) self['type_changes'][change.path( force=FORCE_DEFAULT)] = remap_dict - if Verbose.level: + if Verbose.level and include_values: remap_dict.update(old_value=change.t1, new_value=change.t2) def _from_tree_value_changed(self, tree): diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index ecb1e3d2..f9189b0b 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -841,8 +841,8 @@ def test_custom_objects_change(self): assert result == ddiff def test_custom_objects_slot_change(self): - class ClassA(object): - __slots__ = ['x', 'y'] + class ClassA: + __slots__ = ('x', 'y') def __init__(self, x, y): self.x = x @@ -861,8 +861,38 @@ def __init__(self, x, y): } assert result == ddiff + def test_custom_class_changes_with_slot_changes(self): + class ClassA: + __slots__ = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + class ClassB: + __slots__ = ['x'] + + ddiff = DeepDiff(ClassA, ClassB) + result = {'type_changes': {'root': {'old_type': ClassA, 'new_type': ClassB}}} + assert result == ddiff + + def test_custom_class_changes_with_slot_change_when_ignore_type(self): + class ClassA: + __slots__ = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + class ClassB: + __slots__ = ['x'] + + ddiff = DeepDiff(ClassA, ClassB, ignore_type_in_groups=[(ClassA, ClassB)]) + result = {'iterable_item_removed': {'root.__slots__[1]': 'y'}, 'attribute_removed': ['root.__init__', 'root.y']} + assert result == ddiff + def test_custom_objects_slot_in_parent_class_change(self): - class ClassA(object): + class ClassA: __slots__ = ['x'] class ClassB(ClassA): @@ -1340,6 +1370,14 @@ def test_ignore_type_in_groups_numbers_and_strings_when_ignore_order(self): result = {'iterable_item_added': {'root[2]': 3.3}, 'iterable_item_removed': {'root[2]': 3}} assert result == ddiff + def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(self): + t1 = {'a': 10, b'a': 20} + t2 = {'a': 11, b'a': 22} + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, ignore_order=True) + result = {'values_changed': {"root['a']": {'new_value': 22, 'old_value': 20}}} + alternative_result = {'values_changed': {"root['a']": {'new_value': 11, 'old_value': 10}}} + assert result == ddiff or alternative_result == ddiff + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_base_level_dictionary_remapping(self): """ From c5ce0ba7435a6994e2ceb7be9e84463eb9fa391f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 00:51:52 -0700 Subject: [PATCH 65/76] fixing docs --- deepdiff/diff.py | 34 +++++++++++++++++++++++++----- deepdiff/diff_doc.rst | 4 +++- deepdiff/helper.py | 5 +++-- tests/test_diff_text.py | 7 ++++++- tests/test_serialization.py | 42 ++++++++++++++++++++++++++++++------- 5 files changed, 75 insertions(+), 17 deletions(-) diff --git a/deepdiff/diff.py b/deepdiff/diff.py index e0936266..933ad8a2 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -611,8 +611,6 @@ def __diff(self, level, parents_ids=frozenset({})): else: self.__diff_obj(level, parents_ids) - return - @property def json(self): warnings.warn( @@ -628,7 +626,7 @@ def json(self): def to_json_pickle(self): """ - Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, doing to_json is the safer option that json pickle. + Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, running to_json() is the safer option that json pickle. """ copied = self.copy() return jsonpickle.encode(copied) @@ -655,13 +653,39 @@ def from_json_pickle(cls, value): def to_json(self, default_mapping=None): """ - Dump json of the text view + Dump json of the text view. + **Parameters** + + default_mapping : default_mapping, dictionary(optional), a dictionary of mapping of different types to json types. + + by default DeepDiff converts certain data types. For example Decimals into floats so they can be exported into json. + If you have a certain object type that the json serializer can not serialize it, please pass the appropriate type + conversion through this dictionary. + + **Example** + + Serialize custom objects + >>> class A: + ... pass + ... + >>> class B: + ... pass + ... + >>> t1 = A() + >>> t2 = B() + >>> ddiff = DeepDiff(t1, t2) + >>> ddiff.to_json() + TypeError: We do not know how to convert <__main__.A object at 0x10648> of type for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type. + + >>> default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} + >>> ddiff.to_json(default_mapping=default_mapping) + >>> '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' """ return json.dumps(self.to_dict(), default=json_convertor_default(default_mapping=default_mapping)) def to_dict(self): """ - Dump dictionary of the text view + Dump dictionary of the text view. It does not matter which view you are currently in. It will give you the dictionary of the text view. """ if self.view == TREE_VIEW: result = dict(self._get_view_results(view=TEXT_VIEW)) diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 50b3162f..f648cd77 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -694,8 +694,10 @@ Example: >>> ddiff.to_json() '{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' +.. seealso:: + Take a look at to_json() documentation in this page for more details. -If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. +If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. Note that json_pickle is unsafe and json pickle dumps from untrusted sources should never be loaded. Serialize and then deserialize back to deepdiff >>> t1 = {1: 1, 2: 2, 3: 3} diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 53e25220..471604a8 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -18,8 +18,8 @@ py4 = py_major_version == '4' if py4: - logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') - py3 = True + logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') # pragma: no cover + py3 = True # pragma: no cover if py2: # pragma: no cover sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') @@ -137,6 +137,7 @@ class indexed_set(set): Decimal: float, OrderedSet: list, type: lambda x: x.__name__, + bytes: lambda x: x.decode('utf-8') } diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index f9189b0b..3e60c4a6 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -888,7 +888,7 @@ class ClassB: __slots__ = ['x'] ddiff = DeepDiff(ClassA, ClassB, ignore_type_in_groups=[(ClassA, ClassB)]) - result = {'iterable_item_removed': {'root.__slots__[1]': 'y'}, 'attribute_removed': ['root.__init__', 'root.y']} + result = {'iterable_item_removed': {'root.__slots__[1]': 'y'}, 'attribute_removed': {'root.__init__', 'root.y'}} assert result == ddiff def test_custom_objects_slot_in_parent_class_change(self): @@ -1194,6 +1194,11 @@ def test_decimal_ignore_order(self): assert result == ddiff def test_unicode_string_type_changes(self): + """ + These tests were written when DeepDiff was in Python 2. + Writing b"你好" throws an exception in Python 3 so can't be used for testing. + These tests are currently useless till they are rewritten properly. + """ unicode_string = {"hello": u"你好"} ascii_string = {"hello": "你好"} ddiff = DeepDiff(unicode_string, ascii_string) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 91fc4084..b88f7993 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,31 +1,31 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import pytest from deepdiff import DeepDiff import logging logging.disable(logging.CRITICAL) +t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + class TestDeepAdditions: """Tests for Additions and Subtractions.""" def test_serialization_text(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) assert "builtins.list" in ddiff.to_json_pickle() + jsoned = ddiff.to_json() + assert "world" in jsoned def test_deserialization(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) assert ddiff == ddiff2 def test_serialization_tree(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') pickle_jsoned = ddiff.to_json_pickle() assert "world" in pickle_jsoned @@ -33,8 +33,6 @@ def test_serialization_tree(self): assert "world" in jsoned def test_deserialization_tree(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) @@ -49,3 +47,31 @@ def test_deleting_serialization_cache_when_using_the_property(self): assert hasattr(ddiff, '_json') del ddiff.json assert hasattr(ddiff, '_json') is False + + def test_serialize_custom_objects_throws_error(self): + class A: + pass + + class B: + pass + + t1 = A() + t2 = B() + ddiff = DeepDiff(t1, t2) + with pytest.raises(TypeError): + ddiff.to_json() + + def test_serialize_custom_objects_with_default_mapping(self): + class A: + pass + + class B: + pass + + t1 = A() + t2 = B() + ddiff = DeepDiff(t1, t2) + default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} + result = ddiff.to_json(default_mapping=default_mapping) + expected_result = r'{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' + assert expected_result == result From 8a29be58f2997a5cf84fef4c2b75e8bc3e1837f2 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:12:23 -0700 Subject: [PATCH 66/76] adding more docs --- deepdiff/deephash.py | 1 + deepdiff/deephash_doc.rst | 75 +++++++++++++-- deepdiff/diff_doc.rst | 195 +++++++++++++++++++++----------------- 3 files changed, 179 insertions(+), 92 deletions(-) diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index b35698b6..684d5ab6 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -51,6 +51,7 @@ class DeepHash(dict, Base): def __init__(self, obj, + *, hashes=None, exclude_types=None, exclude_paths=None, diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 48b12557..23db72de 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -10,6 +10,9 @@ At the core of it, DeepHash is a deterministic serialization of your object into can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead. +**Import** + >>> from deepdiff import DeepHash + **Parameters** obj : any object, The object to be hashed based on its content. @@ -62,10 +65,33 @@ apply_hash: Boolean, default = True The only time you want the apply_hash to be False is if you want to know what the string representation of your object is BEFORE it gets hashed. +ignore_type_in_groups + Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. + The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + + For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + + 1. Set ignore_string_type_changes=True which is the default. + 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + + Now what if you want also typeA and typeB to be ignored when comparing agains each other? + + 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] + 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + ignore_string_type_changes: Boolean, default = True string type conversions should not affect the hash output when this is set to True. For example "Hello" and b"Hello" should produce the same hash. +By setting it to True, both the string and bytes of hello return the same hash. + >>> DeepHash(b'hello', ignore_string_type_changes=True) + {b'hello': 221860156526691709602818861774599422448} + >>> DeepHash('hello', ignore_string_type_changes=True) + {'hello': 221860156526691709602818861774599422448} + +ignore_numeric_type_changes +Default: False + ignore_numeric_type_changes: Boolean, default = True numeric type conversions should not affect the hash output when this is set to True. For example 10, 10.0 and Decimal(10) should produce the same hash. @@ -77,6 +103,41 @@ ignore_numeric_type_changes: Boolean, default = True That way they both produce the same hash. + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> DeepHash(t1)[1] + 231678797214551245419120414857003063149 + >>> DeepHash(t1)[1.0] + 231678797214551245419120414857003063149 + +You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + >>> d1[burrito] == d2[taco] + True + **Returns** A dictionary of {item: item hash}. If your object is nested, it will build hashes of all the objects it contains too. @@ -122,19 +183,21 @@ If you do a deep copy of obj, it should still give you the same hash: >>> DeepHash(obj2)[obj2] 34150898645750099477987229399128149852 -Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: +Note that by default DeepHash will include string type differences. So if your strings were bytes: >>> obj3 = {1: 2, b'a': b'b'} >>> DeepHash(obj3)[obj3] - 34150898645750099477987229399128149852 - -But if you want a different hash if string types are different, set ignore_string_type_changes to False: - >>> DeepHash(obj3, ignore_string_type_changes=False)[obj3] 64067525765846024488103933101621212760 -On the other hand, ignore_numeric_type_changes is by default False. +But if you want the same hash if string types are different, set ignore_string_type_changes to True: + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] + 34150898645750099477987229399128149852 + +ignore_numeric_type_changes is by default False too. >>> obj1 = {4:10} >>> obj2 = {4.0: Decimal(10.0)} >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] False + +But by setting it to True, we can get the same hash. >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] True diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index f648cd77..189352e4 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -181,92 +181,6 @@ String difference 2 2 End - -Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - -And if you don't care about the value of items that have changed type, please set verbose level to 0 - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': }}} - -ignore_type_in_groups - -Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. -The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. - -For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: - -1. Set ignore_string_type_changes=True which is the default. -2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . - -Now what if you want also typeA and typeB to be ignored when comparing agains each other? - -1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] -2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] - -ignore_string_type_changes -Default: False - >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) - {} - >>> DeepDiff(b'hello', 'hello') - {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} - -ignore_numeric_type_changes -Default: False - -Ignore Type Number - Dictionary that contains float and integer: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1: 1, 2: 2.22} - >>> t2 = {1: 1.0, 2: 2.22} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[1]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) - >>> pprint(ddiff, indent=2) - {} - -Ignore Type Number - List that contains float and integer: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 2, 3] - >>> t2 = [1.0, 2.0, 3.0] - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff, indent=2) - { 'type_changes': { 'root[0]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}, - 'root[1]': { 'new_type': , - 'new_value': 2.0, - 'old_type': , - 'old_value': 2}, - 'root[2]': { 'new_type': , - 'new_value': 3.0, - 'old_type': , - 'old_value': 3}}} - >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) - >>> pprint(ddiff, indent=2) - {} - -You can pass a list of tuples if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: - -ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] - - List difference >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} @@ -375,6 +289,115 @@ Approximate float comparison (Significant digits after the point): You just need to set view='tree' to get it in tree form. +**Ignore Type Changes** + +Type change + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'type_changes': { "root[4]['b']": { 'new_type': , + 'new_value': 'world\n\n\nEnd', + 'old_type': , + 'old_value': [1, 2, 3]}}} + +And if you don't care about the value of items that have changed type, please set verbose level to 0 + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'old_type': }}} + +ignore_type_in_groups + +Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. +The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + +For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + +1. Set ignore_string_type_changes=True which is the default. +2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + +Now what if you want also typeA and typeB to be ignored when comparing agains each other? + +1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] +2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + +ignore_string_type_changes +Default: False + >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) + {} + >>> DeepDiff(b'hello', 'hello') + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} + +ignore_numeric_type_changes +Default: False + +Ignore Type Number - Dictionary that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[1]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +Ignore Type Number - List that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 2, 3] + >>> t2 = [1.0, 2.0, 3.0] + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + {} + + **Tree View** Starting the version 3 You can chooe the view into the deepdiff results. From 49e053165ae26b158e2f65100a67cb8d438c6395 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:37:24 -0700 Subject: [PATCH 67/76] updating docs --- README.md | 10 +-- deepdiff/diff_doc.rst | 32 +++---- docs/index.rst | 196 ++---------------------------------------- 3 files changed, 29 insertions(+), 209 deletions(-) diff --git a/README.md b/README.md index 04c08a4d..42e4c9ba 100644 --- a/README.md +++ b/README.md @@ -385,19 +385,19 @@ If you do a deep copy of obj, it should still give you the same hash: 34150898645750099477987229399128149852 ``` -Note that by default DeepHash will ignore string type differences. So if your strings were bytes, you would still get the same hash: +Note that by default DeepHash will include string type differences. So if your strings were bytes: ```py >>> obj3 = {1: 2, b'a': b'b'} >>> DeepHash(obj3)[obj3] -34150898645750099477987229399128149852 +64067525765846024488103933101621212760 ``` -But if you want a different hash if string types are different, set ignore_string_type_changes to False: +But if you want the same hash if string types are different, set ignore_string_type_changes to True: ```py ->>> DeepHash(obj3, ignore_string_type_changes=False)[obj3] -64067525765846024488103933101621212760 +>>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] +34150898645750099477987229399128149852 ``` # Using DeepDiff in unit tests diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 189352e4..87466330 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -46,8 +46,6 @@ exclude_paths: list, default = None exclude_regex_paths: list, default = None List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. -exclude_types: list, default = None - List of object types to exclude from the report. hasher: default = DeepHash.murmur3_128bit Hash function to be used. If you don't want Murmur3, you can use Python's built-in hash function @@ -59,13 +57,17 @@ view: string, default = text The new view is called the tree view which allows you to traverse through the tree of changed items. +exclude_types: list, default = None + List of object types to exclude from the report. + ignore_string_type_changes: Boolean, default = False Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. ignore_numeric_type_changes: Boolean, default = False Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. -ignore_type_in_groups: Tuple or List of Tuples, default=None ignores types when t1 and t2 are both within the same type group. +ignore_type_in_groups: Tuple or List of Tuples, default = None + ignores types when t1 and t2 are both within the same type group. **Returns** @@ -308,6 +310,17 @@ And if you don't care about the value of items that have changed type, please se { 'type_changes': { 'root[2]': { 'new_type': , 'old_type': }}} + +Exclude types + +Exclude certain types from comparison: + >>> l1 = logging.getLogger("test") + >>> l2 = logging.getLogger("test2") + >>> t1 = {"log": l1, 2: 1337} + >>> t2 = {"log": l2, 2: 1337} + >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) + {} + ignore_type_in_groups Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. @@ -316,7 +329,7 @@ The shortcuts are ignore_string_type_changes which by default is False and ignor For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: 1. Set ignore_string_type_changes=True which is the default. -2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . +2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . Now what if you want also typeA and typeB to be ignored when comparing agains each other? @@ -651,17 +664,6 @@ Approximate float comparison (Significant digits after the point) (Tree View): >>> ddiff {'values_changed': {}} - -**Exclude types** - -Exclude certain types from comparison: - >>> l1 = logging.getLogger("test") - >>> l2 = logging.getLogger("test2") - >>> t1 = {"log": l1, 2: 1337} - >>> t2 = {"log": l2, 2: 1337} - >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) - {} - **Exclude paths** Exclude part of your object tree from comparison diff --git a/docs/index.rst b/docs/index.rst index 9ce26ab1..54554d79 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -35,20 +35,9 @@ Importing >>> from deepdiff import DeepHash # For hashing objects based on their contents ******** -Features +DeepDiff ******** -Parameters -~~~~~~~~~~ - -- t1 (the first object) -- t2 (the second object) -- `ignore\_order`_ -- `report\_repetition`_ -- `exclude\_types\_or\_paths`_ -- `significant\_digits`_ -- `views`_ - Supported data types ~~~~~~~~~~~~~~~~~~~~ @@ -74,34 +63,6 @@ List difference ignoring order or duplicates >>> print (ddiff) {} -Report repetitions -~~~~~~~~~~~~~~~~~~ - -This flag ONLY works when ignoring order is enabled. - -.. code:: python - - t1 = [1, 3, 1, 4] - t2 = [4, 4, 1] - ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - print(ddiff) - -which will print you: - -.. code:: python - - {'iterable_item_removed': {'root[1]': 3}, - 'repetition_change': {'root[0]': {'old_repeat': 2, - 'old_indexes': [0, 2], - 'new_indexes': [2], - 'value': 1, - 'new_repeat': 1}, - 'root[3]': {'old_repeat': 1, - 'old_indexes': [3], - 'new_indexes': [0, 1], - 'value': 4, - 'new_repeat': 2}}} - Exclude types or paths ~~~~~~~~~~~~~~~~~~~~~~ @@ -117,16 +78,6 @@ Exclude certain types from comparison >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) {} -Exclude part of your object tree from comparison ------------------------------------------------- - -.. code:: python - - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) - {} - Significant Digits ~~~~~~~~~~~~~~~~~~ @@ -143,149 +94,23 @@ X=significant\_digits >>> DeepDiff(t1, t2, significant_digits=1) {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} -Approximate float comparison: ------------------------------ - -.. code:: python - - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> pprint(DeepDiff(t1, t2, significant_digits=3)) - {} - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} - >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) - {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} - - -Views -~~~~~ - -Text View (default) -------------------- - -Text view is the original and currently the default view of DeepDiff. - -It is called text view because the results contain texts that represent the path to the data: - -Example of using the text view. - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> print(ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} - -So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. - -.. seealso:: - The following examples are using the *default text view.* - The Tree View is introduced in DeepDiff v3 and provides traversing capabilities through your diffed data and more! - Read more about the Tree View at :doc:`/diff` - -Tree View (new) ---------------- - -Starting the version v3 You can choose the view into the deepdiff results. -The tree view provides you with tree objects that you can traverse through to find -the parents of the objects that are diffed and the actual objects that are being diffed. -This view is very useful when dealing with nested objects. -Note that tree view always returns results in the form of Python sets. - -You can traverse through the tree elements! - -.. note:: - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. - -.. code:: text - - +---------------------------------------------------------------+ - | | - | parent(t1) parent node parent(t2) | - | + ^ + | - +------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | - +------|----------------------|-------------------------|-------+ - | v v v | - | child(t1) child node child(t2) | - | | - +---------------------------------------------------------------+ - - -The tree view allows you to have more than mere textual representaion of the diffed objects. -It gives you the actual objects (t1, t2) throughout the tree of parents and children. - -:Example: - -.. code:: python - - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') - >>> ddiff_verbose0 - {'values_changed': {}} - >>> - >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') - >>> ddiff_verbose1 - {'values_changed': {}} - >>> set_of_values_changed = ddiff_verbose1['values_changed'] - >>> # since set_of_values_changed includes only one item in a set - >>> # in order to get that one item we can: - >>> (changed,) = set_of_values_changed - >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - - >>> changed.t1 - 2 - >>> changed.t2 - 4 - >>> # You can traverse through the tree, get to the parents! - >>> changed.up - - -.. seealso:: - Read more about the Tree View at :doc:`/diff` - - -Verbose Level -~~~~~~~~~~~~~ - -Verbose level by default is 1. The possible values are 0, 1 and 2. - -- verbose_level 0: won’t report values when type changed. -- verbose_level 1: default -- verbose_level 2: will report values when custom objects or - dictionaries have items added or removed. - -.. seealso:: - Read more about the verbosity at :doc:`/diff` - Serialization ~~~~~~~~~~~~~ -DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. This works for both tree view and text view. - -:Serialize and then deserialize back to deepdiff: +:Serialize to json: .. code:: python >>> t1 = {1: 1, 2: 2, 3: 3} >>> t2 = {1: 1, 2: "2", 3: 3} >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json + >>> jsoned = ddiff.to_json() >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True + '{"type_changes": {"root[2]": {"new_type": "str", "new_value": "2", "old_type": "int", "old_value": 2}}}' -Read more in +And many more features! Read more in :doc:`/diff` @@ -373,7 +198,7 @@ But with DeepHash: >>> from deepdiff import DeepHash >>> obj = {1: 2, 'a': 'b'} >>> DeepHash(obj) - {4355639248: (2468916477072481777, 512283587789292749), 4355639280: (-3578777349255665377, -6377555218122431491), 4358636128: (-8839064797231613815, -1822486391929534118), 4358009664: (8833996863197925870, -419376694314494743), 4357467952: (3415089864575009947, 7987229399128149852)} + {1: 2468916477072481777512283587789292749, 2: -35787773492556653776377555218122431491, ...} So what is exactly the hash of obj in this case? DeepHash is calculating the hash of the obj and any other object that obj contains. @@ -384,19 +209,12 @@ In order to get the hash of obj itself, you need to use the object (or the id of >>> hashes = DeepHash(obj) >>> hashes[obj] - (3415089864575009947, 7987229399128149852) + 34150898645750099477987229399128149852 Read more in the Deep Hash reference: :doc:`/deephash` -.. _ignore\_order: #ignore-order -.. _report\_repetition: #report-repetitions -.. _verbose\_level: #verbose-level -.. _exclude\_types\_or\_paths: #exclude-types-or-paths -.. _significant\_digits: #significant-digits -.. _views: #views - References ========== From 3e1c002d1b4426933ee6fb6e60ad27fceb052f4f Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:42:09 -0700 Subject: [PATCH 68/76] updating docs --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 42e4c9ba..d2e20e10 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 DeepDiff gets the difference of 2 objects. +Please take a look at the DeepDiff docs at + ## Parameters In addition to the 2 objects being compared: From d09a0109d27cfb70ebac662047545ed4bba08766 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:42:47 -0700 Subject: [PATCH 69/76] docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d2e20e10..e61089ee 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 DeepDiff gets the difference of 2 objects. -Please take a look at the DeepDiff docs at +Please take a look at the DeepDiff docs at <./deepdiff/diff_doc.rst> ## Parameters From fea0bf3b333bdd9ee20f25a174202dad3bec6924 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:43:41 -0700 Subject: [PATCH 70/76] docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e61089ee..cf4d2367 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 DeepDiff gets the difference of 2 objects. -Please take a look at the DeepDiff docs at <./deepdiff/diff_doc.rst> +Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) ## Parameters From ed8ec00c1c53fbd58fb4a9c4ccf86419bda0544c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 01:58:40 -0700 Subject: [PATCH 71/76] more docs --- README.md | 823 +++++----------------------------------- deepdiff/search.py | 36 +- deepdiff/search_doc.rst | 26 ++ 3 files changed, 137 insertions(+), 748 deletions(-) create mode 100644 deepdiff/search_doc.rst diff --git a/README.md b/README.md index cf4d2367..30db927d 100644 --- a/README.md +++ b/README.md @@ -15,24 +15,7 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 **NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** -## Table of Contents - -- [Installation](#Installation) -- [Parameters](#parameters) -- [Ignore Order](#ignore-order) -- [Report repetitions](#report-repetitions) -- [Exclude types or paths](#exclude-type-or-paths) -- [Significant Digits](#significant-digits) -- [Ignore Type Number](#ignore-type-number) -- [Verbose Level](#verbose-level) -- [Deep Search](#deep-search) -- [Deep Hash](#deep-hash) -- [Using DeepDiff in unit tests](#using-deepdiff-in-unit-tests) -- [Difference with Json Patch](#difference-with-json-patch) -- [Views](#views) -- [Text View](#text-view) -- [Tree View](#tree-view) -- [Serialization](#serialization) + - [Documentation](http://deepdiff.readthedocs.io/en/latest/) @@ -55,27 +38,9 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 DeepDiff gets the difference of 2 objects. Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +The full documentation can be found on -## Parameters - -In addition to the 2 objects being compared: - -- [ignore_order](#ignore-order) -- [report_repetition](#report-repetitions) -- [exclude_types](#exclude-types) -- [exclude_paths](#exclude-paths) -- [exclude_regex_paths](#exclude-regex-paths) -- [verbose_level](#verbose-level) -- [significant_digits](#significant-digits) -- [view](#views) - -## Supported data types - -int, string, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - -## Ignore Order - -Sometimes you don't care about the order of objects when comparing them. In those cases, you can set `ignore_order=True`. However this flag won't report the repetitions to you. You need to additionally enable `report_repetition=True` for getting a report of repetitions. +## Examples ### List difference ignoring order or duplicates @@ -87,7 +52,7 @@ Sometimes you don't care about the order of objects when comparing them. In thos {} ``` -## Report repetitions +### Report repetitions This flag ONLY works when ignoring order is enabled. Note that this feature is experimental. @@ -115,11 +80,7 @@ which will print you: 'new_repeat': 2}}} ``` -## Exclude types or paths - -### Exclude types - -#### Exclude certain types from comparison: +### Exclude certain types from comparison: ```python >>> l1 = logging.getLogger("test") @@ -130,11 +91,7 @@ which will print you: {} ``` -### Exclude paths - -#### Exclude part of your object tree from comparison - -use `exclude_paths` and pass a set or list of paths to exclude: +### Exclude part of your object tree from comparison ```python >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} @@ -158,18 +115,7 @@ You can also exclude using regular expressions by using `exclude_regex_paths` an {} ``` -example 2: - -```python ->>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} ->>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} ->>> DeepDiff(t1, t2, exclude_regex_paths={r"\['foo.'\]"}) -{} -``` - -Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. - -## Significant Digits +### Significant Digits Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits @@ -182,41 +128,7 @@ Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Numb {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} ``` -Approximate float comparison: - -```python ->>> t1 = [ 1.1129, 1.3359 ] ->>> t2 = [ 1.113, 1.3362 ] ->>> pprint(DeepDiff(t1, t2, significant_digits=3)) -{} ->>> pprint(DeepDiff(t1, t2)) -{'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} ->>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) -{'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} -``` - -## Ignore Type In Groups - -Ignore Type Number - Dictionary that contains float and integer: - -```py ->>> from deepdiff import DeepDiff ->>> from pprint import pprint ->>> t1 = {1: 1, 2: 2.22} ->>> t2 = {1: 1.0, 2: 2.22} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint(ddiff, indent=2) -{ 'type_changes': { 'root[1]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}}} ->>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) ->>> pprint(ddiff, indent=2) -{} -``` - -Ignore Type Number - List that contains float and integer: +### Ignore Type Number - List that contains float and integer: ```py >>> from deepdiff import DeepDiff @@ -242,62 +154,108 @@ Ignore Type Number - List that contains float and integer: {} ``` -## Verbose Level +## Views -Verbose level by default is 1. The possible values are 0, 1 and 2. - -- Verbose level 0: won't report values when type changed. [Example](#type-of-an-item-has-changed) -- Verbose level 1: default -- Verbose level 2: will report values when custom objects or dictionaries have items added or removed. [Example](#items-added-or-removed-verbose) +Starting with DeepDiff v 3, there are two different views into your diffed data: text view (original) and tree view (new). -# Deep Search -(New in v2-1-0) +### Text View -Tip: Take a look at [grep](#grep) which gives you a new interface for DeepSearch! +Text view is the original and currently the default view of DeepDiff. -DeepDiff comes with a utility to find the path to the item you are looking for. -It is called DeepSearch and it has a similar interface to DeepDiff. +It is called text view because the results contain texts that represent the path to the data: -Let's say you have a huge nested object and want to see if any item with the word `somewhere` exists in it. +Example of using the text view. -```py -from deepdiff import DeepSearch -obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} -ds = DeepSearch(obj, "somewhere", verbose_level=2) -print(ds) +```python +>>> from deepdiff import DeepDiff +>>> t1 = {1:1, 3:3, 4:4} +>>> t2 = {1:1, 3:3, 5:5, 6:6} +>>> ddiff = DeepDiff(t1, t2) +>>> print(ddiff) +{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} ``` -Which will print: +So for example `ddiff['dictionary_item_removed']` is a set if strings thus this is called the text view. -```py -{'matched_paths': {"root['somewhere']": "around"}, - 'matched_values': {"root['long']": "somewhere"}} + The following examples are using the *default text view.* + The Tree View is introduced in DeepDiff v3 + and provides traversing capabilities through your diffed data and more! + Read more about the Tree View at the [tree view section](#tree-view) of this page. + + +### Tree View + +Starting the version v3 You can choose the view into the deepdiff results. +The tree view provides you with tree objects that you can traverse through to find the parents of the objects that are diffed and the actual objects that are being diffed. + + +#### Value of an item has changed (Tree View) + +```python +>>> from deepdiff import DeepDiff +>>> from pprint import pprint +>>> t1 = {1:1, 2:2, 3:3} +>>> t2 = {1:1, 2:4, 3:3} +>>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') +>>> ddiff_verbose0 +{'values_changed': {}} +>>> +>>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') +>>> ddiff_verbose1 +{'values_changed': {}} +>>> set_of_values_changed = ddiff_verbose1['values_changed'] +>>> # since set_of_values_changed includes only one item in a set +>>> # in order to get that one item we can: +>>> (changed,) = set_of_values_changed +>>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] + +>>> changed.t1 +2 +>>> changed.t2 +4 +>>> # You can traverse through the tree, get to the parents! +>>> changed.up + ``` -Now, think of a case where you want to match a value as a word. +### Serialization -```py -from deepdiff import DeepSearch -obj = {"long": "somewhere around", "string": 2, 0: 0, "somewhere": "around"} -ds = DeepSearch(obj, "around", match_string=True, verbose_level=2) -print(ds) -ds = DeepSearch(obj, "around", verbose_level=2) -print(ds) +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +Note that to_dict will use the text view even if you did the diff in tree view. + +Example: + +```python +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_dict() +{'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} ``` -Which will print: +In order to do safe json serialization, use the to_json() method. -```py -{'matched_values': {"root['somewhere']": 'around'}} -{'matched_values': {"root['long']": 'somewhere around',"root['somewhere']": 'around'}} +Example: + +```python +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_json() +'{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' ``` -Tip: An interesting use case is to search inside `locals()` when doing pdb. -## Grep -(New in v3-2-0) +Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +The full documentation can be found on + + +# Deep Search + +DeepDiff comes with a utility to find the path to the item you are looking for. +It is called DeepSearch and it has a similar interface to DeepDiff. -Grep is another interface for DeepSearch. +Let's say you have a huge nested object and want to see if any item with the word `somewhere` exists in it. Just grep through your objects as you would in shell! ```py @@ -321,12 +279,18 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` +Please take a look at the [DeepSearch docs](deepdiff/search_doc.rst) +The full documentation can be found on + # Deep Hash (New in v4-0-0) DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. +Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +The full documentation can be found on + Let's say you have a dictionary object. ```py @@ -372,35 +336,10 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -The result hash is `34150898645750099477987229399128149852`. -In this case the hash of the obj is 128 bit that is divided into 2 64bit integers. -Using Murmur3 128bit for hashing is preferred (and is the default behaviour) -since the chance of hash collision will be minimal and hashing will be deterministic -and will not depend on the version of the Python. -If you do a deep copy of obj, it should still give you the same hash: +Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +The full documentation can be found on -```py ->>> from copy import deepcopy ->>> obj2 = deepcopy(obj) ->>> DeepHash(obj2)[obj2] -34150898645750099477987229399128149852 -``` - -Note that by default DeepHash will include string type differences. So if your strings were bytes: - -```py ->>> obj3 = {1: 2, b'a': b'b'} ->>> DeepHash(obj3)[obj3] -64067525765846024488103933101621212760 -``` - -But if you want the same hash if string types are different, set ignore_string_type_changes to True: - -```py ->>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] -34150898645750099477987229399128149852 -``` # Using DeepDiff in unit tests @@ -437,561 +376,7 @@ Example in DeepDiff for the same operation: {'type_changes': {"root['a']['b']['c']": {'old_type': , 'new_value': 42, 'old_value': 'foo', 'new_type': }}} ``` -# Views - -Starting with DeepDiff v 3, there are two different views into your diffed data: text view (original) and tree view (new). - -## Text View - -Text view is the original and currently the default view of DeepDiff. - -It is called text view because the results contain texts that represent the path to the data: - -Example of using the text view. - -```python ->>> from deepdiff import DeepDiff ->>> t1 = {1:1, 3:3, 4:4} ->>> t2 = {1:1, 3:3, 5:5, 6:6} ->>> ddiff = DeepDiff(t1, t2) ->>> print(ddiff) -{'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} -``` - -So for example `ddiff['dictionary_item_removed']` is a set if strings thus this is called the text view. - - The following examples are using the *default text view.* - The Tree View is introduced in DeepDiff v3 - and provides traversing capabilities through your diffed data and more! - Read more about the Tree View at the [tree view section](#tree-view) of this page. - - -### Importing - -```python ->>> from deepdiff import DeepDiff -``` - -### Same object returns empty - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = t1 ->>> print(DeepDiff(t1, t2)) -{} -``` - -### Type of an item has changed - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:"2", 3:3} ->>> pprint(DeepDiff(t1, t2), indent=2) -{ 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} -``` - -And if you don't care about the value of items that have changed type, please set verbose level to 0: - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:"2", 3:3} ->>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) -{ 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': ,}}} -``` - - -### Value of an item has changed - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:4, 3:3} ->>> pprint(DeepDiff(t1, t2), indent=2) -{'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} -``` - -### Item added or removed - -```python ->>> t1 = {1:1, 3:3, 4:4} ->>> t2 = {1:1, 3:3, 5:5, 6:6} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint(ddiff) -{'dictionary_item_added': {'root[5]', 'root[6]'}, - 'dictionary_item_removed': {'root[4]'}} -``` - -#### Items added or removed verbose - -And if you would like to know the values of items added or removed, please set the verbose_level to 2: - -```python ->>> t1 = {1:1, 3:3, 4:4} ->>> t2 = {1:1, 3:3, 5:5, 6:6} ->>> ddiff = DeepDiff(t1, t2, verbose_level=2) ->>> pprint(ddiff, indent=2) -{ 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, - 'dictionary_item_removed': {'root[4]': 4}} -``` - -### String difference - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} ->>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} -``` - -### String difference 2 - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - ->>> ->>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) ---- -+++ -@@ -1,5 +1,4 @@ --world! --Goodbye! -+world - 1 - 2 - End -``` - -### List difference - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} -``` - -### List difference Example 2 - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} -``` - -### List that contains dictionary: - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'dictionary_item_removed': ["root[4]['b'][2][2]"], - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} -``` - -### Sets: - -```python ->>> t1 = {1, 2, 8} ->>> t2 = {1, 2, 3, 5} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (DeepDiff(t1, t2)) -{'set_item_added': ['root[3]', 'root[5]'], 'set_item_removed': ['root[8]']} -``` - -### Named Tuples: - -```python ->>> from collections import namedtuple ->>> Point = namedtuple('Point', ['x', 'y']) ->>> t1 = Point(x=11, y=22) ->>> t2 = Point(x=11, y=23) ->>> pprint (DeepDiff(t1, t2)) -{'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} -``` - -### Custom objects: - -```python ->>> class ClassA(object): -... a = 1 -... def __init__(self, b): -... self.b = b -... ->>> t1 = ClassA(1) ->>> t2 = ClassA(2) ->>> ->>> pprint(DeepDiff(t1, t2)) -{'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} -``` - -### Object attribute added: - -```python ->>> t2.c = "new attribute" ->>> pprint(DeepDiff(t1, t2)) -{'attribute_added': ['root.c'], - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} -``` - - - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - - -## Tree View - -Starting the version v3 You can choose the view into the deepdiff results. -The tree view provides you with tree objects that you can traverse through to find the parents of the objects that are diffed and the actual objects that are being diffed. - -This view is very useful when dealing with nested objects. -Note that tree view always returns results in the form of Python sets. - -You can traverse through the tree elements! - - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. - -``` -+---------------------------------------------------------------+ -| | -| parent(t1) parent node parent(t2) | -| + ^ + | -+------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | -+------|----------------------|-------------------------|-------+ -| v v v | -| child(t1) child node child(t2) | -| | -+---------------------------------------------------------------+ -``` - - - up - Move up to the parent node - - down - Move down to the child node - - path() - Get the path to the current node - - t1 - The first item in the current node that is being diffed - - t2 - The second item in the current node that is being diffed - - additional - Additional information about the node i.e. repetition - - repetition - Shortcut to get the repetition report - - -The tree view allows you to have more than mere textual representaion of the diffed objects. -It gives you the actual objects (t1, t2) throughout the tree of parents and children. - -## Examples - Tree View - - The Tree View is introduced in DeepDiff v3 - Set view='tree' in order to use this view. - -### Value of an item has changed (Tree View) - -```python ->>> from deepdiff import DeepDiff ->>> from pprint import pprint ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:4, 3:3} ->>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') ->>> ddiff_verbose0 -{'values_changed': {}} ->>> ->>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') ->>> ddiff_verbose1 -{'values_changed': {}} ->>> set_of_values_changed = ddiff_verbose1['values_changed'] ->>> # since set_of_values_changed includes only one item in a set ->>> # in order to get that one item we can: ->>> (changed,) = set_of_values_changed ->>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - ->>> changed.t1 -2 ->>> changed.t2 -4 ->>> # You can traverse through the tree, get to the parents! ->>> changed.up - -``` - -### List difference (Tree View) - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> ddiff -{'iterable_item_removed': {, }} ->>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. ->>> # One way to get one item from the set is to convert it to a list ->>> # And then get the first item of the list: ->>> removed = list(ddiff['iterable_item_removed'])[0] ->>> removed - ->>> ->>> parent = removed.up ->>> parent - ->>> parent.path() -"root[4]['b']" ->>> parent.t1 -[1, 2, 3, 4] ->>> parent.t2 -[1, 2] ->>> parent.up - ->>> parent.up.up - ->>> parent.up.up.t1 -{1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} ->>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff -True -``` - -### List difference 2 (Tree View) - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint(ddiff, indent = 2) -{ 'iterable_item_added': {}, - 'values_changed': { , - }} ->>> ->>> # Note that iterable_item_added is a set with one item. ->>> # So in order to get that one item from it, we can do: ->>> ->>> (added,) = ddiff['iterable_item_added'] ->>> added - ->>> added.up.up - ->>> added.up.up.path() -'root[4]' ->>> added.up.up.down - ->>> ->>> # going up twice and then down twice gives you the same node in the tree: ->>> added.up.up.down.down == added -True -``` - -### List difference ignoring order but reporting repetitions (Tree View) - -```python ->>> t1 = [1, 3, 1, 4] ->>> t2 = [4, 4, 1] ->>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') ->>> pprint(ddiff, indent=2) -{ 'iterable_item_removed': {}, - 'repetition_change': { , - }} ->>> ->>> # repetition_change is a set with 2 items. ->>> # in order to get those 2 items, we can do the following. ->>> # or we can convert the set to list and get the list items. ->>> # or we can iterate through the set items ->>> ->>> (repeat1, repeat2) = ddiff['repetition_change'] ->>> repeat1 # the default verbosity is set to 1. - ->>> # The actual data regarding the repetitions can be found in the repetition attribute: ->>> repeat1.repetition -{'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} ->>> ->>> # If you change the verbosity, you will see less: ->>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) ->>> ddiff -{'repetition_change': {, }, 'iterable_item_removed': {}} ->>> (repeat1, repeat2) = ddiff['repetition_change'] ->>> repeat1 - ->>> ->>> # But the verbosity level does not change the actual report object. ->>> # It only changes the textual representaion of the object. We get the actual object here: ->>> repeat1.repetition -{'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} ->>> repeat1.t1 -4 ->>> repeat1.t2 -4 ->>> repeat1.up - -``` - -### List that contains dictionary (Tree View) - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint (ddiff, indent = 2) -{ 'dictionary_item_removed': {}, - 'values_changed': {}} - -Sets (Tree View): ->>> t1 = {1, 2, 8} ->>> t2 = {1, 2, 3, 5} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> print(ddiff) -{'set_item_removed': {}, 'set_item_added': {, }} ->>> # grabbing one item from set_item_removed set which has one item only ->>> (item,) = ddiff['set_item_removed'] ->>> item.up - ->>> item.up.t1 == t1 -True -``` - -### Named Tuples (Tree View): - -```python ->>> from collections import namedtuple ->>> Point = namedtuple('Point', ['x', 'y']) ->>> t1 = Point(x=11, y=22) ->>> t2 = Point(x=11, y=23) ->>> print(DeepDiff(t1, t2, view='tree')) -{'values_changed': {}} -``` - -### Custom objects (Tree View): - -```python ->>> class ClassA(object): -... a = 1 -... def __init__(self, b): -... self.b = b -... ->>> t1 = ClassA(1) ->>> t2 = ClassA(2) ->>> ->>> print(DeepDiff(t1, t2, view='tree')) -{'values_changed': {}} -``` - -### Object attribute added (Tree View): - -```python ->>> t2.c = "new attribute" ->>> pprint(DeepDiff(t1, t2, view='tree')) -{'attribute_added': {}, - 'values_changed': {}} -``` - -### Approximate decimals comparison (Significant digits after the point) (Tree View): - -```python ->>> t1 = Decimal('1.52') ->>> t2 = Decimal('1.57') ->>> DeepDiff(t1, t2, significant_digits=0, view='tree') -{} ->>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') ->>> ddiff -{'values_changed': {}} ->>> (change1,) = ddiff['values_changed'] ->>> change1 - ->>> change1.t1 -Decimal('1.52') ->>> change1.t2 -Decimal('1.57') ->>> change1.path() -'root' -``` - -### Approximate float comparison (Significant digits after the point) (Tree View): - -```python ->>> t1 = [ 1.1129, 1.3359 ] ->>> t2 = [ 1.113, 1.3362 ] ->>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') ->>> ddiff -{} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint(ddiff, indent=2) -{ 'values_changed': { , - }} ->>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') ->>> ddiff -{'values_changed': {}} -``` - - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - -## Serialization - -In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. -Note that to_dict will use the text view even if you did the diff in tree view. - -Example: - -```python ->>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} ->>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> ddiff.to_dict() -{'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} -``` - -In order to do safe json serialization, use the to_json() method. - -Example: - -```python ->>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} ->>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> ddiff.to_json() -'{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' -``` - -If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. - -Serialize and then deserialize back to deepdiff - -```python ->>> t1 = {1: 1, 2: 2, 3: 3} ->>> t2 = {1: 1, 2: "2", 3: 3} ->>> ddiff = DeepDiff(t1, t2) ->>> jsoned = ddiff.to_json_pickle ->>> jsoned -'{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' ->>> ddiff_new = DeepDiff.from_json_pickle(jsoned) ->>> ddiff == ddiff_new -True -``` - -## Pycon 2016 +# Pycon 2016 I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think: @@ -999,11 +384,11 @@ I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. And here is more info: -## Documentation +# Documentation -## ChangeLog +# ChangeLog - v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path @@ -1037,14 +422,14 @@ And here is more info: - v0-5-6: Adding slots support - v0-5-5: Adding loop detection -## Contribute +# Contribute 1. Please make your PR against the dev branch 2. Please make sure that your PR has tests. Since DeepDiff is used in many sensitive data driven projects, we maintain 100% test coverage on the code. There are occasiannly exceptions to that rule but that is rare. Thank you! -## Authors +# Authors Seperman (Sep Dehpour) diff --git a/deepdiff/search.py b/deepdiff/search.py index 4f07de34..9f32e614 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -2,15 +2,20 @@ # -*- coding: utf-8 -*- # In order to run the docstrings: # python3 -m deepdiff.search +import os import re from collections.abc import MutableMapping, Iterable import logging -from deepdiff.helper import strings, numbers, add_to_frozen_set +from deepdiff.helper import strings, numbers, add_to_frozen_set, current_dir logger = logging.getLogger(__name__) +with open(os.path.join(current_dir, 'search_doc.rst'), 'r') as doc_file: + doc = doc_file.read() + + class DeepSearch(dict): r""" **DeepSearch** @@ -306,34 +311,7 @@ def __search(self, obj, item, parent="root", parents_ids=frozenset({})): class grep: - """ - **Grep!** - - grep is a new interface for Deep Search. It takes exactly the same arguments. - And it works just like grep in shell! - - **Examples** - - Importing - >>> from deepdiff import grep - >>> from pprint import pprint - - Search in list for string - >>> obj = ["long somewhere", "string", 0, "somewhere great!"] - >>> item = "somewhere" - >>> ds = obj | grep(item) - >>> print(ds) - {'matched_values': {'root[3]', 'root[0]'} - - Search in nested data for string - >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] - >>> item = "somewhere" - >>> ds = obj | grep(item, verbose_level=2) - >>> pprint(ds, indent=2) - { 'matched_paths': {"root[1]['somewhere']": 'around'}, - 'matched_values': { 'root[0]': 'something somewhere', - "root[1]['long']": 'somewhere'}} - """ + __doc__ = doc def __init__(self, item, diff --git a/deepdiff/search_doc.rst b/deepdiff/search_doc.rst new file mode 100644 index 00000000..cc40ab52 --- /dev/null +++ b/deepdiff/search_doc.rst @@ -0,0 +1,26 @@ +**Grep** + +grep is a new interface for Deep Search. It takes exactly the same arguments. +And it works just like grep in shell! + +**Examples** + +Importing + >>> from deepdiff import grep + >>> from pprint import pprint + +Search in list for string + >>> obj = ["long somewhere", "string", 0, "somewhere great!"] + >>> item = "somewhere" + >>> ds = obj | grep(item) + >>> print(ds) + {'matched_values': {'root[3]', 'root[0]'} + +Search in nested data for string + >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] + >>> item = "somewhere" + >>> ds = obj | grep(item, verbose_level=2) + >>> pprint(ds, indent=2) + { 'matched_paths': {"root[1]['somewhere']": 'around'}, + 'matched_values': { 'root[0]': 'something somewhere', + "root[1]['long']": 'somewhere'}} From 601f8196b44b27bc5b5674dded97c4d3b4c25eb5 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 02:00:59 -0700 Subject: [PATCH 72/76] docs --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 30db927d..07243af0 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,8 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 DeepDiff gets the difference of 2 objects. -Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) -The full documentation can be found on +> - Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +> - The full documentation can be found on ## Examples @@ -246,8 +246,8 @@ Example: ``` -Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) -The full documentation can be found on +> - Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +> - The full documentation can be found on # Deep Search @@ -279,8 +279,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` -Please take a look at the [DeepSearch docs](deepdiff/search_doc.rst) -The full documentation can be found on +> - Please take a look at the [DeepSearch docs](deepdiff/search_doc.rst) +> - The full documentation can be found on # Deep Hash (New in v4-0-0) @@ -288,8 +288,8 @@ The full documentation can be found on DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. -Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) -The full documentation can be found on +> - Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +> - The full documentation can be found on Let's say you have a dictionary object. @@ -337,8 +337,8 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) -The full documentation can be found on +> - Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +> - The full documentation can be found on # Using DeepDiff in unit tests From 44753e6e42cac75a33236ed00064b2efc549000a Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 02:08:55 -0700 Subject: [PATCH 73/76] fixing pypy3 tests --- tests/test_hash.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/test_hash.py b/tests/test_hash.py index ad1249db..2f10b478 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -178,7 +178,7 @@ def test_named_tuples(self): obj = Point(x=11) result = DeepHashPrep(obj, ignore_string_type_changes=True) if pypy3: - assert result[get_id(obj)] == "ntPoint:{%s:int:11}" % x + assert result[obj] == "ntPoint:{%s:int:11}" % x else: expected_result = { x: x_prep, @@ -197,21 +197,9 @@ class MyEnum(Enum): # the ids of strings change if pypy3: # only compare the hashes for the enum instances themselves - assert DeepHashPrep(MyEnum.A)[get_id(MyEnum.A)] == ( - 'objdict:{' - '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' - '_name_:A;_value_:int:1}' - ) - assert DeepHashPrep(MyEnum.B)[get_id(MyEnum.B)] == ( - 'objdict:{' - '__objclass__:EnumMeta:objdict:{_name_:A;_value_:int:1};' - '_name_:B;_value_:int:2}' - ) - assert DeepHashPrep(MyEnum(1))[get_id(MyEnum.A)] == ( - 'objdict:{' - '__objclass__:EnumMeta:objdict:{_name_:B;_value_:int:2};' - '_name_:A;_value_:int:1}' - ) + assert DeepHashPrep(MyEnum.A)[MyEnum.A] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:B;str:_value_:int:2};str:_name_:str:A;str:_value_:int:1}' + assert DeepHashPrep(MyEnum.B)[MyEnum.B] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:A;str:_value_:int:1};str:_name_:str:B;str:_value_:int:2}' + assert DeepHashPrep(MyEnum(1))[MyEnum.A] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:B;str:_value_:int:2};str:_name_:str:A;str:_value_:int:1}' else: assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum.A) assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum(1)) From f2a40f8b2423ef996fb602ff7aa8499a59c425bc Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 02:20:22 -0700 Subject: [PATCH 74/76] fixing test for older py 3 --- tests/test_serialization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index b88f7993..d8236ebb 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import json import pytest from deepdiff import DeepDiff @@ -73,5 +74,5 @@ class B: ddiff = DeepDiff(t1, t2) default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} result = ddiff.to_json(default_mapping=default_mapping) - expected_result = r'{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' - assert expected_result == result + expected_result = {"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}} + assert expected_result == json.loads(result) From 1c109440187c5115b50c5664e46652d6818850b5 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 02:27:38 -0700 Subject: [PATCH 75/76] updating docs --- README.md | 20 ++++++++------------ docs/index.rst | 19 ++++++++----------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 07243af0..b5fda0de 100644 --- a/README.md +++ b/README.md @@ -431,18 +431,14 @@ Thank you! # Authors -Seperman (Sep Dehpour) - -- [Github](https://github.com/seperman) -- [Linkedin](http://www.linkedin.com/in/sepehr) -- [ZepWorks](http://www.zepworks.com) - -Victor Hahn Castell - -- [hahncastell.de](http://hahncastell.de) -- [flexoptix.net](http://www.flexoptix.net) - -Also thanks to: +- Seperman (Sep Dehpour) + - [Github](https://github.com/seperman) + - [Linkedin](http://www.linkedin.com/in/sepehr) + - [ZepWorks](http://www.zepworks.com) + +- Victor Hahn Castell for major contributions + - [hahncastell.de](http://hahncastell.de) + - [flexoptix.net](http://www.flexoptix.net) - nfvs for Travis-CI setup script. - brbsix for initial Py3 porting. diff --git a/docs/index.rst b/docs/index.rst index 54554d79..0e168247 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -274,20 +274,17 @@ Changelog Authors ======= -Sep Dehpour +- Sep Dehpour -- `Github `_ -- `ZepWorks `_ -- `Linkedin `_ -- `Article about Deepdiff `_ + - `Github `_ + - `ZepWorks `_ + - `Linkedin `_ + - `Article about Deepdiff `_ -Victor Hahn Castell +- Victor Hahn Castell for major contributions -- `hahncastell.de `_ -- `flexoptix.net `_ - - -ALso thanks to: + - `hahncastell.de `_ + - `flexoptix.net `_ - nfvs for Travis-CI setup script. - brbsix for initial Py3 porting. From 906c16d00652061e96aa4862d349c91e5962645c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 19 Mar 2019 02:52:25 -0700 Subject: [PATCH 76/76] Fixing docstrings --- deepdiff/deephash_doc.rst | 5 ++- deepdiff/diff.py | 2 +- deepdiff/diff_doc.rst | 92 +++++++++++++++++++-------------------- 3 files changed, 49 insertions(+), 50 deletions(-) diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 23db72de..8318c161 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -111,6 +111,7 @@ ignore_numeric_type_changes: Boolean, default = True 231678797214551245419120414857003063149 You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> from deepdiff import DeepDiff >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] @@ -133,8 +134,8 @@ ignore_type_in_groups example with custom objects: >>> burritos = [burrito] >>> tacos = [taco] >>> - >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) - >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) >>> d1[burrito] == d2[taco] True diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 933ad8a2..ea6d41cc 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -679,7 +679,7 @@ def to_json(self, default_mapping=None): >>> default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} >>> ddiff.to_json(default_mapping=default_mapping) - >>> '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' + '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' """ return json.dumps(self.to_dict(), default=json_convertor_default(default_mapping=default_mapping)) diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 87466330..64f10929 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -89,7 +89,7 @@ Example of using the text view. >>> t2 = {1:1, 3:3, 5:5, 6:6} >>> ddiff = DeepDiff(t1, t2) >>> print(ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} + {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. @@ -129,8 +129,8 @@ Item added and/or removed >>> t2 = {1:1, 3:3, 5:5, 6:6} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, - 'dictionary_item_removed': {'root[4]'}} + {'dictionary_item_added': [root[5], root[6]], + 'dictionary_item_removed': [root[4]]} Set verbose level to 2 in order to see the added or removed items with their values >>> t1 = {1:1, 3:3, 4:4} @@ -230,7 +230,7 @@ List that contains dictionary: >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} >>> ddiff = DeepDiff(t1, t2) >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {"root[4]['b'][2][2]"}, + { 'dictionary_item_removed': [root[4]['b'][2][2]], 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} Sets: @@ -238,7 +238,7 @@ Sets: >>> t2 = {1, 2, 3, 5} >>> ddiff = DeepDiff(t1, t2) >>> pprint(ddiff) - {'set_item_added': {'root[5]', 'root[3]'}, 'set_item_removed': {'root[8]'}} + {'set_item_added': [root[3], root[5]], 'set_item_removed': [root[8]]} Named Tuples: >>> from collections import namedtuple @@ -263,7 +263,7 @@ Custom objects: Object attribute added: >>> t2.c = "new attribute" >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': {'root.c'}, + {'attribute_added': [root.c], 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} Approximate decimals comparison (Significant digits after the point): @@ -272,7 +272,7 @@ Approximate decimals comparison (Significant digits after the point): >>> DeepDiff(t1, t2, significant_digits=0) {} >>> DeepDiff(t1, t2, significant_digits=1) - {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} + {'values_changed': {'root': {'new_value': Decimal('1.57'), 'old_value': Decimal('1.52')}}} Approximate float comparison (Significant digits after the point): >>> t1 = [ 1.1129, 1.3359 ] @@ -354,9 +354,9 @@ Ignore Type Number - Dictionary that contains float and integer: >>> ddiff = DeepDiff(t1, t2) >>> pprint(ddiff, indent=2) { 'type_changes': { 'root[1]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}}} + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) >>> pprint(ddiff, indent=2) {} @@ -369,17 +369,17 @@ Ignore Type Number - List that contains float and integer: >>> ddiff = DeepDiff(t1, t2) >>> pprint(ddiff, indent=2) { 'type_changes': { 'root[0]': { 'new_type': , - 'new_value': 1.0, - 'old_type': , - 'old_value': 1}, - 'root[1]': { 'new_type': , - 'new_value': 2.0, - 'old_type': , - 'old_value': 2}, - 'root[2]': { 'new_type': , - 'new_value': 3.0, - 'old_type': , - 'old_value': 3}}} + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) >>> pprint(ddiff, indent=2) {} @@ -469,11 +469,11 @@ Value of an item has changed (Tree View) >>> t2 = {1:1, 2:4, 3:3} >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') >>> ddiff_verbose0 - {'values_changed': {}} + {'values_changed': []} >>> >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') >>> ddiff_verbose1 - {'values_changed': {}} + {'values_changed': []} >>> set_of_values_changed = ddiff_verbose1['values_changed'] >>> # since set_of_values_changed includes only one item in a set >>> # in order to get that one item we can: @@ -493,13 +493,13 @@ List difference (Tree View) >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} >>> ddiff = DeepDiff(t1, t2, view='tree') >>> ddiff - {'iterable_item_removed': {, }} + {'iterable_item_removed': [, ]} >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. >>> # One way to get one item from the set is to convert it to a list >>> # And then get the first item of the list: >>> removed = list(ddiff['iterable_item_removed'])[0] >>> removed - + >>> >>> parent = removed.up >>> parent @@ -524,16 +524,15 @@ List difference 2 (Tree View) >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} >>> ddiff = DeepDiff(t1, t2, view='tree') >>> pprint(ddiff, indent = 2) - { 'iterable_item_added': {}, - 'values_changed': { , - }} + { 'iterable_item_added': [], + 'values_changed': [, ]} >>> >>> # Note that iterable_item_added is a set with one item. >>> # So in order to get that one item from it, we can do: >>> >>> (added,) = ddiff['iterable_item_added'] >>> added - + >>> added.up.up >>> added.up.up.path() @@ -550,9 +549,8 @@ List difference ignoring order but reporting repetitions (Tree View) >>> t2 = [4, 4, 1] >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {}, - 'repetition_change': { , - }} + { 'iterable_item_removed': [], + 'repetition_change': [, ]} >>> >>> # repetition_change is a set with 2 items. >>> # in order to get those 2 items, we can do the following. @@ -561,7 +559,7 @@ List difference ignoring order but reporting repetitions (Tree View) >>> >>> (repeat1, repeat2) = ddiff['repetition_change'] >>> repeat1 # the default verbosity is set to 1. - + >>> # The actual data regarding the repetitions can be found in the repetition attribute: >>> repeat1.repetition {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} @@ -569,7 +567,7 @@ List difference ignoring order but reporting repetitions (Tree View) >>> # If you change the verbosity, you will see less: >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) >>> ddiff - {'repetition_change': {, }, 'iterable_item_removed': {}} + {'repetition_change': [, ], 'iterable_item_removed': []} >>> (repeat1, repeat2) = ddiff['repetition_change'] >>> repeat1 @@ -590,15 +588,15 @@ List that contains dictionary (Tree View) >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} >>> ddiff = DeepDiff(t1, t2, view='tree') >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {}, - 'values_changed': {}} + { 'dictionary_item_removed': [], + 'values_changed': []} Sets (Tree View): >>> t1 = {1, 2, 8} >>> t2 = {1, 2, 3, 5} >>> ddiff = DeepDiff(t1, t2, view='tree') >>> print(ddiff) - {'set_item_removed': {}, 'set_item_added': {, }} + {'set_item_removed': [], 'set_item_added': [, ]} >>> # grabbing one item from set_item_removed set which has one item only >>> (item,) = ddiff['set_item_removed'] >>> item.up @@ -612,7 +610,7 @@ Named Tuples (Tree View): >>> t1 = Point(x=11, y=22) >>> t2 = Point(x=11, y=23) >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} + {'values_changed': []} Custom objects (Tree View): >>> class ClassA(object): @@ -624,13 +622,13 @@ Custom objects (Tree View): >>> t2 = ClassA(2) >>> >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} + {'values_changed': []} Object attribute added (Tree View): >>> t2.c = "new attribute" >>> pprint(DeepDiff(t1, t2, view='tree')) - {'attribute_added': {}, - 'values_changed': {}} + {'attribute_added': [], + 'values_changed': []} Approximate decimals comparison (Significant digits after the point) (Tree View): >>> t1 = Decimal('1.52') @@ -639,7 +637,7 @@ Approximate decimals comparison (Significant digits after the point) (Tree View) {} >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') >>> ddiff - {'values_changed': {}} + {'values_changed': []} >>> (change1,) = ddiff['values_changed'] >>> change1 @@ -658,11 +656,10 @@ Approximate float comparison (Significant digits after the point) (Tree View): {} >>> ddiff = DeepDiff(t1, t2, view='tree') >>> pprint(ddiff, indent=2) - { 'values_changed': { , - }} + { 'values_changed': [, ]} >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') >>> ddiff - {'values_changed': {}} + {'values_changed': []} **Exclude paths** @@ -676,6 +673,7 @@ use `exclude_paths` and pass a set or list of paths to exclude, if only one item {} You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. + >>> import re >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] >>> print(DeepDiff(t1, t2, exclude_regex_paths=r"root\[\d+\]\['b'\]")) @@ -728,9 +726,9 @@ Serialize and then deserialize back to deepdiff >>> t1 = {1: 1, 2: 2, 3: 3} >>> t2 = {1: 1, 2: "2", 3: 3} >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.to_json_pickle + >>> jsoned = ddiff.to_json_pickle() >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "dict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' + '{"type_changes": {"root[2]": {"new_type": {"py/type": "builtins.str"}, "new_value": "2", "old_type": {"py/type": "builtins.int"}, "old_value": 2}}}' >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) >>> ddiff == ddiff_new True