diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index b02608d4..c650d348 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9, "3.10"] architecture: ["x64"] steps: diff --git a/AUTHORS.md b/AUTHORS.md index e946785e..83f35113 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,6 +1,6 @@ -Authors: +# Authors -Authors in order of the contributions: +Authors in order of the timeline of their contributions: - [Sep Dehpour (Seperman)](http://www.zepworks.com) - [Victor Hahn Castell](http://hahncastell.de) for the tree view and major contributions: @@ -36,4 +36,8 @@ Authors in order of the contributions: - Tim Klein [timjklein36](https://github.com/timjklein36) for retaining the order of multiple dictionary items added via Delta. - Wilhelm Schürmann[wbsch](https://github.com/wbsch) for fixing the typo with yml files. - [lyz-code](https://github.com/lyz-code) for adding support for regular expressions in DeepSearch and strict_checking feature in DeepSearch. -- [dtorres-sf](https://github.com/dtorres-sf)for adding the option for custom compare function +- [dtorres-sf](https://github.com/dtorres-sf) for adding the option for custom compare function +- Tony Wang [Tony-Wang](https://github.com/Tony-Wang) for bugfix: verbose_level==0 should disable values_changes. +- Sun Ao [eggachecat](https://github.com/eggachecat) for adding custom operators. +- Sun Ao [eggachecat](https://github.com/eggachecat) for adding ignore_order_func. +- [SlavaSkvortsov](https://github.com/SlavaSkvortsov) for fixing unprocessed key error. diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c727831..e75ddb9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # DeepDiff Change log +- v5-6-0: Adding custom operators, and ignore_order_func. Bugfix: verbose_level==0 should disable values_changes. Bugfix: unprocessed key error. - v5-5-0: adding iterable_compare_func for DeepDiff, adding output_format of list for path() in tree view. - v5-4-0: adding strict_checking for numbers in DeepSearch. - v5-3-0: add support for regular expressions in DeepSearch. diff --git a/README.md b/README.md index 87e20699..bc101db7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 5.5.0 +# DeepDiff v 5.6.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -18,21 +18,65 @@ Tested on Python 3.6+ and PyPy3. **NOTE: The last version of DeepDiff to work on Python 3.5 was DeepDiff 5-0-2** -- [Documentation](https://zepworks.com/deepdiff/5.5.0/) +- [Documentation](https://zepworks.com/deepdiff/5.6.0/) ## What is new? -Deepdiff 5.5.0 comes with regular expressions in the DeepSearch and grep modules: +DeepDiff 5-6-0 allows you to pass custom operators. ```python ->>> from deepdiff import grep ->>> from pprint import pprint ->>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] ->>> ds = obj | grep("some.*", use_regexp=True) -{ 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], - 'matched_values': ['root[0]', "root[1]['long']"]} +>>> from deepdiff import DeepDiff +>>> from deepdiff.operator import BaseOperator +>>> class CustomClass: +... def __init__(self, d: dict, l: list): +... self.dict = d +... self.dict['list'] = l +... +>>> +>>> custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) +>>> custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) +>>> custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) +>>> +>>> +>>> class ListMatchOperator(BaseOperator): +... def give_up_diffing(self, level, diff_instance): +... if set(level.t1.dict['list']) == set(level.t2.dict['list']): +... return True +... +>>> +>>> DeepDiff(custom1, custom2, custom_operators=[ +... ListMatchOperator(types=[CustomClass]) +... ]) +{} +>>> +>>> +>>> DeepDiff(custom2, custom3, custom_operators=[ +... ListMatchOperator(types=[CustomClass]) +... ]) +{'dictionary_item_added': [root.dict['a'], root.dict['b']], 'dictionary_item_removed': [root.dict['c'], root.dict['d']], 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} +>>> + +``` + +**New in 5-6-0: Dynamic ignore order function** + +Ignoring order when certain word in the path + +```python +>>> from deepdiff import DeepDiff +>>> t1 = {'a': [1, 2], 'b': [3, 4]} +>>> t2 = {'a': [2, 1], 'b': [4, 3]} +>>> DeepDiff(t1, t2, ignore_order=True) +{} +>>> def ignore_order_func(level): +... return 'a' in level.path() +... +>>> DeepDiff(t1, t2, ignore_order=True, ignore_order_func=ignore_order_func) +{'values_changed': {"root['b'][0]": {'new_value': 4, 'old_value': 3}, "root['b'][1]": {'new_value': 3, 'old_value': 4}}} + ``` + ## Installation ### Install from PyPi: @@ -66,13 +110,13 @@ Note: if you want to use DeepDiff via commandline, make sure to run `pip install DeepDiff gets the difference of 2 objects. -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.5.0/diff.html) -> - The full documentation of all modules can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.6.0/diff.html) +> - The full documentation of all modules can be found on > - Tutorials and posts about DeepDiff can be found on ## A few Examples -> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. +> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. ### List difference ignoring order or duplicates @@ -276,8 +320,8 @@ Example: ``` -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.5.0/diff.html) -> - The full documentation can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.6.0/diff.html) +> - The full documentation can be found on # Deep Search @@ -309,8 +353,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` -> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.5.0/dsearch.html) -> - The full documentation can be found on +> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.6.0/dsearch.html) +> - The full documentation can be found on # Deep Hash (New in v4-0-0) @@ -318,8 +362,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.5.0/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.6.0/deephash.html) +> - The full documentation can be found on Let's say you have a dictionary object. @@ -367,8 +411,8 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.5.0/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.6.0/deephash.html) +> - The full documentation can be found on # Using DeepDiff in unit tests diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 9298b225..79a37575 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '5.5.0' +__version__ = '5.6.0' import logging if __name__ == '__main__': diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 4a9445fa..b8c54725 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -13,9 +13,8 @@ from deepdiff.base import Base logger = logging.getLogger(__name__) -UNPROCESSED_KEY = 'unprocessed' +UNPROCESSED_KEY = object() -RESERVED_DICT_KEYS = {UNPROCESSED_KEY} EMPTY_FROZENSET = frozenset() INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') @@ -185,7 +184,7 @@ def _getitem(hashes, obj, extract_index=0): except KeyError: raise KeyError(HASH_LOOKUP_ERR_MSG.format(obj)) from None - if isinstance(obj, strings) and obj in RESERVED_DICT_KEYS: + if obj is UNPROCESSED_KEY: extract_index = None return result_n_count if extract_index is None else result_n_count[extract_index] @@ -229,7 +228,7 @@ def _get_objects_to_hashes_dict(self, extract_index=0): """ result = dict_() for key, value in self.hashes.items(): - if key in RESERVED_DICT_KEYS: + if key is UNPROCESSED_KEY: result[key] = value else: result[key] = value[extract_index] diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 2f349031..4b488613 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -28,14 +28,13 @@ RemapDict, ResultDict, TextResult, TreeResult, DiffLevel, DictRelationship, AttributeRelationship, SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, - SetRelationship, NumpyArrayRelationship) + SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD) from deepdiff.deephash import DeepHash, combine_hashes_lists from deepdiff.base import Base from deepdiff.lfucache import LFUCache, DummyLFU logger = logging.getLogger(__name__) - MAX_PASSES_REACHED_MSG = ( 'DeepDiff has reached the max number of passes of {}. ' 'You can possibly get more accurate results by increasing the max_passes parameter.') @@ -120,6 +119,7 @@ def __init__(self, hasher=None, hashes=None, ignore_order=False, + ignore_order_func=None, ignore_type_in_groups=None, ignore_string_type_changes=False, ignore_numeric_type_changes=False, @@ -140,6 +140,7 @@ def __init__(self, verbose_level=1, view=TEXT_VIEW, iterable_compare_func=None, + custom_operators=None, _original_type=None, _parameters=None, _shared_parameters=None, @@ -156,12 +157,17 @@ def __init__(self, "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, " "cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, " "math_epsilon, iterable_compare_func, _original_type, " + "ignore_order_func, custom_operators, " "_parameters and _shared_parameters.") % ', '.join(kwargs.keys())) if _parameters: self.__dict__.update(_parameters) else: + self.custom_operators = custom_operators or [] self.ignore_order = ignore_order + + self.ignore_order_func = ignore_order_func or (lambda *_args, **_kwargs: ignore_order) + ignore_type_in_groups = ignore_type_in_groups or [] if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups: ignore_numeric_type_changes = True @@ -327,6 +333,24 @@ def _report_result(self, report_type, level): level.report_type = report_type self.tree[report_type].add(level) + def custom_report_result(self, report_type, level, extra_info=None): + """ + Add a detected change to the reference-style result dictionary. + report_type will be added to level. + (We'll create the text-style report from there later.) + :param report_type: A well defined string key describing the type of change. + Examples: "set_item_added", "values_changed" + :param parent: A DiffLevel object describing the objects in question in their + before-change and after-change object structure. + :param extra_info: A dict that describe this result + :rtype: None + """ + + if not self._skip_this(level): + level.report_type = report_type + level.additional[CUSTOM_FIELD] = extra_info + self.tree[report_type].add(level) + @staticmethod def _dict_from_slots(object): def unmangle(attribute): @@ -556,7 +580,7 @@ def _iterables_subscriptable(t1, t2): def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None): """Difference of iterables""" - if self.ignore_order: + if self.ignore_order_func(level): self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type) else: self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type) @@ -1133,7 +1157,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset()): # which means numpy module needs to be available. So np can't be None. raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover - if not self.ignore_order: + if not self.ignore_order_func(level): # fast checks if self.significant_digits is None: if np.array_equal(level.t1, level.t2): @@ -1159,7 +1183,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset()): dimensions = len(shape) if dimensions == 1: self._diff_iterable(level, parents_ids, _original_type=_original_type) - elif self.ignore_order: + elif self.ignore_order_func(level): # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. # They will be converted back to Numpy at their final dimension. level.t1 = level.t1.tolist() @@ -1219,6 +1243,33 @@ def _auto_off_cache(self): self._stats[DISTANCE_CACHE_ENABLED] = False self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache')) + def _use_custom_operator(self, level): + """ + For each level we check all custom operators. + If any one of them was a match for the level, we run the diff of the operator. + If the operator returned True, the operator must have decided these objects should not + be compared anymore. It might have already reported their results. + In that case the report will appear in the final results of this diff. + Otherwise basically the 2 objects in the level are being omitted from the results. + """ + + # used = False + + # for operator in self.custom_operators: + # if operator.match(level): + # prevent_default = operator.diff(level, self) + # used = True if prevent_default is None else prevent_default + + # return used + + for operator in self.custom_operators: + if operator.match(level): + prevent_default = operator.give_up_diffing(level=level, diff_instance=self) + if prevent_default: + return True + + return False + def _diff(self, level, parents_ids=frozenset(), _original_type=None): """ The main diff method @@ -1232,6 +1283,9 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None): if self._count_diff() is StopIteration: return + if self._use_custom_operator(level): + return + if level.t1 is level.t2: return diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 8901ea3c..1be4b0be 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -65,7 +65,7 @@ class np_type: np_int8, np_int16, np_int32, np_int64, np_uint8, np_uint16, np_uint32, np_uint64, np_intp, np_uintp, np_float32, np_float64, np_float_, np_complex64, - np_complex128, np_complex_, ) + np_complex128, np_complex_,) numpy_dtypes = set(numpy_numbers) numpy_dtypes.add(np_bool_) @@ -112,7 +112,6 @@ def copy(self): # pragma: no cover. Only used in pypy3 and py3.5 else: dict_ = OrderedDictPlus # pragma: no cover. Only used in pypy3 and py3.5 - if py4: logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') # pragma: no cover py3 = True # pragma: no cover @@ -184,6 +183,7 @@ class NotPresent: # pragma: no cover in the future. We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D """ + def __repr__(self): return 'not present' # pragma: no cover @@ -202,7 +202,6 @@ class CannotCompare(Exception): not_hashed = NotHashed() notpresent = NotPresent() - # Disabling remapping from old to new keys since the mapping is deprecated. RemapDict = dict_ @@ -316,8 +315,8 @@ def type_in_type_group(item, type_group): def type_is_subclass_of_type_group(item, type_group): return isinstance(item, type_group) \ - or (isinstance(item, type) and issubclass(item, type_group)) \ - or type_in_type_group(item, type_group) + or (isinstance(item, type) and issubclass(item, type_group)) \ + or type_in_type_group(item, type_group) def get_doc(doc_filename): @@ -426,7 +425,6 @@ def __repr__(self): not_found = _NotFound() - warnings.simplefilter('once', DeepDiffDeprecationWarning) @@ -493,8 +491,26 @@ def stop(self): return duration +def _eval_decimal(params): + return Decimal(params) + + +def _eval_datetime(params): + params = f'({params})' + params = literal_eval(params) + return datetime.datetime(*params) + + +def _eval_date(params): + params = f'({params})' + params = literal_eval(params) + return datetime.date(*params) + + LITERAL_EVAL_PRE_PROCESS = [ - ('Decimal(', ')', Decimal), + ('Decimal(', ')', _eval_decimal), + ('datetime.datetime(', ')', _eval_datetime), + ('datetime.date(', ')', _eval_date), ] @@ -508,8 +524,8 @@ def literal_eval_extended(item): for begin, end, func in LITERAL_EVAL_PRE_PROCESS: if item.startswith(begin) and item.endswith(end): # Extracting and removing extra quotes so for example "Decimal('10.1')" becomes "'10.1'" and then '10.1' - item2 = item[len(begin): -len(end)].strip('\'\"') - return func(item2) + params = item[len(begin): -len(end)].strip('\'\"') + return func(params) raise @@ -583,7 +599,7 @@ def get_homogeneous_numpy_compatible_type_of_seq(seq): iseq = iter(seq) first_type = type(next(iseq)) if first_type in {int, float, Decimal}: - type_ = first_type if all((type(x) is first_type) for x in iseq ) else False + type_ = first_type if all((type(x) is first_type) for x in iseq) else False return PYTHON_TYPE_TO_NUMPY_TYPE.get(type_, False) else: return False diff --git a/deepdiff/model.py b/deepdiff/model.py index 80273559..db000b2d 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -1,3 +1,4 @@ +import logging from collections.abc import Mapping from copy import copy from ordered_set import OrderedSet @@ -5,6 +6,8 @@ RemapDict, strings, short_repr, notpresent, get_type, numpy_numbers, np, literal_eval_extended, dict_) +logger = logging.getLogger(__name__) + FORCE_DEFAULT = 'fake' UP_DOWN = {'up': 'down', 'down': 'up'} @@ -24,6 +27,8 @@ "repetition_change", } +CUSTOM_FIELD = "__internal:custom:extra_info" + class DoesNotExist(Exception): pass @@ -47,6 +52,7 @@ class PrettyOrderedSet(OrderedSet): From the perspective of the users of the library, they are dealing with lists. Behind the scene, we have ordered sets. """ + def __repr__(self): return '[{}]'.format(", ".join(map(str, self))) @@ -85,9 +91,13 @@ def mutual_add_removes_to_become_value_changes(self): if 'iterable_item_added' in self and not self['iterable_item_added']: del self['iterable_item_added'] + def __getitem__(self, item): + if item not in self: + self[item] = PrettyOrderedSet() + return self.get(item) -class TextResult(ResultDict): +class TextResult(ResultDict): ADD_QUOTES_TO_STRINGS = True def __init__(self, tree_results=None, verbose_level=1): @@ -135,6 +145,7 @@ def _from_tree_results(self, tree): self._from_tree_set_item_added(tree) self._from_tree_repetition_change(tree) self._from_tree_deep_distance(tree) + self._from_tree_custom_results(tree) def _from_tree_default(self, tree, report_type): if report_type in tree: @@ -182,7 +193,7 @@ def _from_tree_type_changes(self, tree): remap_dict.update(old_value=change.t1, new_value=change.t2) def _from_tree_value_changed(self, tree): - if 'values_changed' in tree: + if 'values_changed' in tree and self.verbose_level > 0: for change in tree['values_changed']: the_changed = {'new_value': change.t2, 'old_value': change.t1} self['values_changed'][change.path( @@ -231,17 +242,36 @@ def _from_tree_repetition_change(self, tree): if 'repetition_change' in tree: for change in tree['repetition_change']: path = change.path(force=FORCE_DEFAULT) - self['repetition_change'][path] = RemapDict(change.additional[ - 'repetition']) + self['repetition_change'][path] = RemapDict( + change.additional['repetition'] + ) self['repetition_change'][path]['value'] = change.t1 def _from_tree_deep_distance(self, tree): if 'deep_distance' in tree: self['deep_distance'] = tree['deep_distance'] + def _from_tree_custom_results(self, tree): + for k, _level_list in tree.items(): + if k not in REPORT_KEYS: + if not isinstance(_level_list, PrettyOrderedSet): + continue -class DeltaResult(TextResult): + # if len(_level_list) == 0: + # continue + # + # if not isinstance(_level_list[0], DiffLevel): + # continue + + # _level_list is a list of DiffLevel + _custom_dict = {} + for _level in _level_list: + _custom_dict[_level.path( + force=FORCE_DEFAULT)] = _level.additional.get(CUSTOM_FIELD, {}) + self[k] = _custom_dict + +class DeltaResult(TextResult): ADD_QUOTES_TO_STRINGS = False def __init__(self, tree_results=None, ignore_order=None): @@ -830,7 +860,11 @@ def stringify_param(self, force=None): resurrected = literal_eval_extended(candidate) # Note: This will miss string-representable custom objects. # However, the only alternative I can currently think of is using eval() which is inherently dangerous. - except (SyntaxError, ValueError): + except (SyntaxError, ValueError) as err: + logger.error( + f'stringify_param was not able to get a proper repr for "{param}". ' + "This object will be reported as None. Add instructions for this object to DeepDiff's " + f"helper.literal_eval_extended to make it work properly: {err}") result = None else: result = candidate if resurrected == param else None diff --git a/deepdiff/operator.py b/deepdiff/operator.py new file mode 100644 index 00000000..be30e745 --- /dev/null +++ b/deepdiff/operator.py @@ -0,0 +1,28 @@ +import re +from deepdiff.helper import convert_item_or_items_into_compiled_regexes_else_none + + +class BaseOperator: + __operator_name__ = "__base__" + + def __init__(self, regex_paths=None, types=None): + if regex_paths: + self.regex_paths = convert_item_or_items_into_compiled_regexes_else_none(regex_paths) + else: + self.regex_paths = None + self.types = types + + def match(self, level) -> bool: + if self.regex_paths: + for pattern in self.regex_paths: + matched = re.search(pattern, level.path()) is not None + if matched: + return True + if self.types: + for type_ in self.types: + if isinstance(level.t1, type_) and isinstance(level.t2, type_): + return True + return False + + def give_up_diffing(self, level, diff_instance) -> bool: + raise NotImplementedError('Please implement the diff function.') diff --git a/deepdiff/path.py b/deepdiff/path.py index 89bddd41..cbea27fc 100644 --- a/deepdiff/path.py +++ b/deepdiff/path.py @@ -19,6 +19,8 @@ class RootCanNotBeModified(ValueError): def _add_to_elements(elements, elem, inside): # Ignore private items + if not elem: + return if not elem.startswith('__'): try: elem = literal_eval(elem) @@ -49,14 +51,31 @@ def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT): inside = False prev_char = None path = path[4:] # removing "root from the beginning" + brackets = [] + inside_quotes = False for char in path: if prev_char == '\\': elem += char + elif char in {'"', "'"}: + elem += char + inside_quotes = not inside_quotes + if not inside_quotes: + _add_to_elements(elements, elem, inside) + elem = '' + elif inside_quotes: + elem += char elif char == '[': if inside == '.': _add_to_elements(elements, elem, inside) - inside = '[' - elem = '' + inside = '[' + elem = '' + # we are already inside. The bracket is a part of the word. + elif inside == '[': + elem += char + else: + inside = '[' + brackets.append('[') + elem = '' elif char == '.': if inside == '[': elem += char @@ -67,9 +86,14 @@ def _path_to_elements(path, root_element=DEFAULT_FIRST_ELEMENT): inside = '.' elem = '' elif char == ']': - _add_to_elements(elements, elem, inside) - elem = '' - inside = False + if brackets and brackets[-1] == '[': + brackets.pop() + if brackets: + elem += char + else: + _add_to_elements(elements, elem, inside) + elem = '' + inside = False else: elem += char prev_char = char diff --git a/docs/Makefile b/docs/Makefile index 0f3c6a50..72c37aac 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -23,6 +23,7 @@ I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . help: @echo "Please use \`make ' where is one of" + @echo " buildme echos what to run to do live builds." @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @@ -48,6 +49,9 @@ help: @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" +buildme: + @echo "Please make sure the .env is pointing to the right path for the build. Then run ./buildme.py" + clean: rm -rf $(BUILDDIR)/* diff --git a/docs/authors.rst b/docs/authors.rst index d9f732bd..f97a490d 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -3,7 +3,7 @@ Authors ======= -Thanks to the following people for their contributions: +Authors in order of the timeline of their contributions: - `Sep Dehpour (Seperman)`_ - `Victor Hahn Castell`_ for the tree view and major contributions: @@ -38,13 +38,15 @@ Thanks to the following people for their contributions: - `Timothy`_ for truncate_datetime - `d0b3rm4n`_ for bugfix to not apply format to non numbers. - `MyrikLD`_ for Bug Fix NoneType in ignore type groups -- Stian Jensen `stianjensen`_ for improving ignoring of NoneType in diff +- Stian Jensen `stianjensen`_ for improving ignoring of NoneType in + diff - Florian Klien `flowolf`_ for adding math_epsilon -- Tim Klein `timjklein36`_ for retaining the order of multiple dictionary items added via Delta -- Wilhelm Schürmann `wbsch`_ for fixing the typo with yml files. -- `lyz_code`_ for adding support for regular expressions in DeepSearch and strict_checking feature in DeepSearch. -- `dtorres_sf`_ for adding the option for custom compare function - +- Tim Klein `timjklein36`_ for retaining the order of multiple + dictionary items added via Delta. +- Wilhelm Schürmann\ `wbsch`_ for fixing the typo with yml files. +- `lyz-code`_ for adding support for regular expressions in DeepSearch + and strict_checking feature in DeepSearch. +- `dtorres-sf`_ for addin .. _Sep Dehpour (Seperman): http://www.zepworks.com .. _Victor Hahn Castell: http://hahncastell.de @@ -77,8 +79,9 @@ Thanks to the following people for their contributions: .. _flowolf: https://github.com/flowolf .. _timjklein36: https://github.com/timjklein36 .. _wbsch: https://github.com/wbsch -.. _lyz_code: https://github.com/lyz-code -.. _dtorres_sf: https://github.com/dtorres-sf +.. _lyz-code: https://github.com/lyz-code +.. _dtorres-sf: https://github.com/dtorres-sf +Thank you for contributing to DeepDiff! Back to :doc:`/index` diff --git a/docs/changelog.rst b/docs/changelog.rst index f1a53e0c..0d7c1e22 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -5,6 +5,7 @@ Changelog DeepDiff Changelog +- v5-6-0: Adding custom operators, and ignore_order_func. Bugfix: verbose_level==0 should disable values_changes. Bugfix: unprocessed key error. - v5-5-0: adding iterable_compare_func for DeepDiff, adding output_format of list for path() in tree view. - v5-4-0: adding strict_checking for numbers in DeepSearch. - v5-3-0: add support for regular expressions in DeepSearch. diff --git a/docs/conf.py b/docs/conf.py index 5cb949e7..8bd3b42e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '5.5.0' +version = '5.6.0' # The full version, including alpha/beta/rc tags. -release = '5.5.0' +release = '5.6.0' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) diff --git a/docs/other.rst b/docs/custom.rst similarity index 51% rename from docs/other.rst rename to docs/custom.rst index fb9055ba..d39a8861 100644 --- a/docs/other.rst +++ b/docs/custom.rst @@ -1,7 +1,7 @@ :doc:`/index` -Other Parameters -================ +Customized Diff +=============== .. _iterable_compare_func_label: @@ -123,7 +123,106 @@ For example you could use the level object to further determine if the 2 objects The level parameter of the iterable_compare_func is only used when ignore_order=False which is the default value for ignore_order. +.. _custom_operators_label: +Custom Operators +---------------- + +Whether two objects are different or not are largely depend on the context. For example, apple and banana are the same +if you are considering whether they are fruits or not. + +In that case, you can pass a *custom_operators* for the job. + +To define an custom operator, you just need to inherit a *BaseOperator* and + + * implement a give_up_diffing method + * give_up_diffing(level: DiffLevel, diff_instance: DeepDiff) -> boolean + + If it returns True, then we will give up diffing the 2 objects. + You may or may not use the diff_instance.custom_report_result within this function + to report any diff. If you decide not to report anything, and this + function returns True, then the objects are basically skipped in the results. + + * pass regex_paths and types that will be used to decide if the objects are matched. + one the objects are matched, then the give_up_diffing will be run to compare them. + + +**Example 1: An operator that mapping L2:distance as diff criteria and reports the distance** + + >>> import math + >>> + >>> from typing import List + >>> from deepdiff import DeepDiff + >>> from deepdiff.operator import BaseOperator + >>> + >>> + >>> class L2DistanceDifferWithPreventDefault(BaseOperator): + ... def __init__(self, regex_paths: List[str], distance_threshold: float): + ... super().__init__(regex_paths) + ... self.distance_threshold = distance_threshold + ... def _l2_distance(self, c1, c2): + ... return math.sqrt( + ... (c1["x"] - c2["x"]) ** 2 + (c1["y"] - c2["y"]) ** 2 + ... ) + ... def give_up_diffing(self, level, diff_instance): + ... l2_distance = self._l2_distance(level.t1, level.t2) + ... if l2_distance > self.distance_threshold: + ... diff_instance.custom_report_result('distance_too_far', level, { + ... "l2_distance": l2_distance + ... }) + ... return True + ... + >>> + >>> t1 = { + ... "coordinates": [ + ... {"x": 5, "y": 5}, + ... {"x": 8, "y": 8} + ... ] + ... } + >>> + >>> t2 = { + ... "coordinates": [ + ... {"x": 6, "y": 6}, + ... {"x": 88, "y": 88} + ... ] + ... } + >>> DeepDiff(t1, t2, custom_operators=[L2DistanceDifferWithPreventDefault( + ... ["^root\\['coordinates'\\]\\[\\d+\\]$"], + ... 1 + ... )]) + {'distance_too_far': {"root['coordinates'][0]": {'l2_distance': 1.4142135623730951}, "root['coordinates'][1]": {'l2_distance': 113.13708498984761}}} + + +**Example 2: If the objects are subclasses of a certain type, only compare them if their list attributes are not equal sets** + + >>> class CustomClass: + ... def __init__(self, d: dict, l: list): + ... self.dict = d + ... self.dict['list'] = l + ... + >>> + >>> custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) + >>> custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) + >>> custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) + >>> + >>> + >>> class ListMatchOperator(BaseOperator): + ... def give_up_diffing(self, level, diff_instance): + ... if set(level.t1.dict['list']) == set(level.t2.dict['list']): + ... return True + ... + >>> + >>> DeepDiff(custom1, custom2, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {} + >>> + >>> + >>> DeepDiff(custom2, custom3, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {'dictionary_item_added': [root.dict['a'], root.dict['b']], 'dictionary_item_removed': [root.dict['c'], root.dict['d']], 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} + >>> Back to :doc:`/index` diff --git a/docs/diff.rst b/docs/diff.rst index 92df2069..2a51a818 100644 --- a/docs/diff.rst +++ b/docs/diff.rst @@ -23,7 +23,7 @@ DeepDiff serialization optimizations stats - other + custom troubleshoot Back to :doc:`/index` diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst index d57d7ab0..5bf23b1c 100644 --- a/docs/diff_doc.rst +++ b/docs/diff_doc.rst @@ -3,7 +3,6 @@ DeepDiff ======== - Deep Difference of dictionaries, iterables, strings and almost any other object. It will recursively look for all the changes. @@ -34,6 +33,9 @@ cache_purge_level: int, 0, 1, or 2. default=1 cache_tuning_sample_size : int >= 0, default = 0 :ref:`cache_tuning_sample_size_label` This is an experimental feature. It works hands in hands with the :ref:`cache_size_label`. When cache_tuning_sample_size is set to anything above zero, it will sample the cache usage with the passed sample size and decide whether to use the cache or not. And will turn it back on occasionally during the diffing process. This option can be useful if you are not sure if you need any cache or not. However you will gain much better performance with keeping this parameter zero and running your diff with different cache sizes and benchmarking to find the optimal cache size. +custom_operators : BaseOperator subclasses, default = None + :ref:`custom_operators_label` if you are considering whether they are fruits or not. In that case, you can pass a *custom_operators* for the job. + exclude_paths: list, default = None :ref:`exclude_paths_label` List of paths to exclude from the report. If only one item, you can path it as a string. @@ -66,6 +68,9 @@ ignore_order : Boolean, default=False Normally ignore_order does not report duplicates and repetition changes. In order to report repetitions, set report_repetition=True in addition to ignore_order=True +ignore_order_func : Function, default=None + :ref:`ignore_order_func_label` Sometimes single *ignore_order* parameter is not enough to do a diff job, + you can use *ignore_order_func* to determine whether the order of certain paths should be ignored ignore_string_type_changes: Boolean, default = False :ref:`ignore_string_type_changes_label` diff --git a/docs/faq.rst b/docs/faq.rst new file mode 100644 index 00000000..b4a6ed05 --- /dev/null +++ b/docs/faq.rst @@ -0,0 +1,88 @@ +:doc:`/index` + +F.A.Q +===== + + +Q: DeepDiff report is not precise when ignore_order=True +----------------------------------------------------- + + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [ + ... { + ... "key": "some/pathto/customers/foo/", + ... "flags": 0, + ... "value": "" + ... }, + ... { + ... "key": "some/pathto/customers/foo/account_number", + ... "flags": 0, + ... "value": "somevalue1" + ... } + ... ] + >>> + >>> t2 = [ + ... { + ... "key": "some/pathto/customers/foo/account_number", + ... "flags": 0, + ... "value": "somevalue2" + ... }, + ... { + ... "key": "some/pathto/customers/foo/", + ... "flags": 0, + ... "value": "new" + ... } + ... ] + >>> + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {"root[0]['key']": {'new_value': 'some/pathto/customers/foo/account_number', + 'old_value': 'some/pathto/customers/foo/'}, + "root[0]['value']": {'new_value': 'somevalue2', + 'old_value': ''}, + "root[1]['key']": {'new_value': 'some/pathto/customers/foo/', + 'old_value': 'some/pathto/customers/foo/account_number'}, + "root[1]['value']": {'new_value': 'new', + 'old_value': 'somevalue1'}}} + +**Answer** + +This is explained in :ref:`cutoff_distance_for_pairs_label` and :ref:`cutoff_intersection_for_pairs_label` + +Bump up these 2 parameters to 1 and you get what you want: + + >>> pprint(DeepDiff(t1, t2, ignore_order=True, cutoff_distance_for_pairs=1, cutoff_intersection_for_pairs=1)) + {'values_changed': {"root[0]['value']": {'new_value': 'new', 'old_value': ''}, + "root[1]['value']": {'new_value': 'somevalue2', + 'old_value': 'somevalue1'}}} + + +Q: TypeError: Object of type type is not JSON serializable +------------------------------------------------------- + +I'm trying to serialize the DeepDiff results into json and I'm getting the TypeError. + + >>> diff=DeepDiff(1, "a") + >>> diff + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': 1, 'new_value': 'a'}}} + >>> json.dumps(diff) + Traceback (most recent call last): + File "", line 1, in + File ".../json/__init__.py", line 231, in dumps + return _default_encoder.encode(obj) + File ".../json/encoder.py", line 199, in encode + chunks = self.iterencode(o, _one_shot=True) + File ".../json/encoder.py", line 257, in iterencode + return _iterencode(o, 0) + File ".../json/encoder.py", line 179, in default + raise TypeError(f'Object of type {o.__class__.__name__} ' + TypeError: Object of type type is not JSON serializable + +**Answer** + +In order to serialize DeepDiff results into json, use to_json() + + >>> diff.to_json() + '{"type_changes": {"root": {"old_type": "int", "new_type": "str", "old_value": 1, "new_value": "a"}}}' + +Back to :doc:`/index` diff --git a/docs/ignore_order.rst b/docs/ignore_order.rst index c0b0eb03..80ecb44a 100644 --- a/docs/ignore_order.rst +++ b/docs/ignore_order.rst @@ -34,6 +34,35 @@ List difference ignoring order or duplicates: (with the same dictionaries as abo >>> print (ddiff) {} +.. _ignore_order_func_label: + +Dynamic Ignore Order +-------------------- + +Sometimes single *ignore_order* parameter is not enough to do a diff job, +you can use *ignore_order_func* to determine whether the order of certain paths should be ignored + +List difference ignoring order with *ignore_order_func* + >>> t1 = {"set": [1,2,3], "list": [1,2,3]} + >>> t2 = {"set": [3,2,1], "list": [3,2,1]} + >>> ddiff = DeepDiff(t1, t2, ignore_order_func=lambda level: "set" in level.path()) + >>> print (ddiff) + { 'values_changed': { "root['list'][0]": {'new_value': 3, 'old_value': 1}, + "root['list'][2]": {'new_value': 1, 'old_value': 3}}} + + +Ignoring order when certain word in the path + >>> from deepdiff import DeepDiff + >>> t1 = {'a': [1, 2], 'b': [3, 4]} + >>> t2 = {'a': [2, 1], 'b': [4, 3]} + >>> DeepDiff(t1, t2, ignore_order=True) + {} + >>> def ignore_order_func(level): + ... return 'a' in level.path() + ... + >>> DeepDiff(t1, t2, ignore_order=True, ignore_order_func=ignore_order_func) + {'values_changed': {"root['b'][0]": {'new_value': 4, 'old_value': 3}, "root['b'][1]": {'new_value': 3, 'old_value': 4}}} + .. _report_repetition_label: @@ -78,7 +107,7 @@ You can control the maximum number of passes that can be run via the max_passes Max Passes Example >>> from pprint import pprint >>> from deepdiff import DeepDiff - >>> + >>> >>> t1 = [ ... { ... 'key3': [[[[[1, 2, 4, 5]]]]], @@ -89,7 +118,7 @@ Max Passes Example ... 'key6': 'val6', ... }, ... ] - >>> + >>> >>> t2 = [ ... { ... 'key5': 'CHANGE', @@ -100,12 +129,12 @@ Max Passes Example ... 'key4': [7, 8], ... }, ... ] - >>> + >>> >>> for max_passes in (1, 2, 62, 65): ... diff = DeepDiff(t1, t2, ignore_order=True, max_passes=max_passes, verbose_level=2) ... print('-\n----- Max Passes = {} -----'.format(max_passes)) ... pprint(diff) - ... + ... DeepDiff has reached the max number of passes of 1. You can possibly get more accurate results by increasing the max_passes parameter. - ----- Max Passes = 1 ----- diff --git a/docs/index.rst b/docs/index.rst index 776acd82..a3db8c5a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 5.5.0 documentation! +DeepDiff 5.6.0 documentation! ============================= ***************** @@ -31,7 +31,7 @@ The DeepDiff library includes the following modules: Supported Python Versions ************************* -DeepDiff is rigorously tested against Python 3.6, 3.7, 3.8, 3.9 and Pypy3 +DeepDiff is rigorously tested against Python 3.6 up to 3.10 and Pypy3 NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2. @@ -39,81 +39,57 @@ NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version t What is New *********** -New In DeepDiff 5.5.0 +New In DeepDiff 5-6-0 --------------------- -1. New option called `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. If `CannotCompare` is raised then it will revert back to comparing in order. If `iterable_compare_func` is not provided or set to None the behavior defaults to comparing items in order. A new report item called `iterable_item_moved` this will only ever be added if there is a custom compare function. +**Create custom operators!** >>> from deepdiff import DeepDiff - >>> from deepdiff.helper import CannotCompare + >>> from deepdiff.operator import BaseOperator + >>> class CustomClass: + ... def __init__(self, d: dict, l: list): + ... self.dict = d + ... self.dict['list'] = l + ... >>> - >>> t1 = [ - ... { - ... 'id': 2, - ... 'value': [7, 8, 1] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8], - ... }, - ... ] + >>> custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) + >>> custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) + >>> custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) >>> - >>> t2 = [ - ... { - ... 'id': 2, - ... 'value': [7, 8] - ... }, - ... { - ... 'id': 3, - ... 'value': [7, 8, 1], - ... }, - ... ] >>> - >>> DeepDiff(t1, t2) - {'values_changed': {"root[0]['id']": {'new_value': 2, 'old_value': 1}, "root[0]['value'][0]": {'new_value': 7, 'old_value': 1}, "root[1]['id']": {'new_value': 3, 'old_value': 2}, "root[2]['id']": {'new_value': 1, 'old_value': 3}, "root[2]['value'][0]": {'new_value': 1, 'old_value': 7}}, 'iterable_item_added': {"root[0]['value'][1]": 8}, 'iterable_item_removed': {"root[2]['value'][1]": 8}} - -Now let's use the custom compare function to guide DeepDiff in what to compare with what: - - >>> def compare_func(x, y, level=None): - ... try: - ... return x['id'] == y['id'] - ... except Exception: - ... raise CannotCompare() from None + >>> class ListMatchOperator(BaseOperator): + ... def give_up_diffing(self, level, diff_instance): + ... if set(level.t1.dict['list']) == set(level.t2.dict['list']): + ... return True ... - >>> DeepDiff(t1, t2, iterable_compare_func=compare_func) - {'iterable_item_added': {"root[2]['value'][2]": 1}, 'iterable_item_removed': {"root[1]['value'][2]": 1}} - -2. You can get the path() of item in the tree view in the list format instead of string representation by passing path(output_format='list') - -.. code:: python - - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff - {'iterable_item_removed': [, ]} - >>> removed = ddiff['iterable_item_removed'][0] - >>> removed.path() - "root[4]['b'][2]" - >>> removed.path(output_format='list') - [4, 'b', 2] - - -New In Deepdiff 5.3.0 ---------------------- + >>> + >>> DeepDiff(custom1, custom2, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {} + >>> + >>> + >>> DeepDiff(custom2, custom3, custom_operators=[ + ... ListMatchOperator(types=[CustomClass]) + ... ]) + {'dictionary_item_added': [root.dict['a'], root.dict['b']], 'dictionary_item_removed': [root.dict['c'], root.dict['d']], 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} + >>> -Deepdiff 5.3.0 comes with regular expressions in the DeepSearch and grep modules: +**Dynamic ignore order function** -.. code:: python +Ignoring order when certain word in the path - >>> from deepdiff import grep - >>> from pprint import pprint - >>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] - >>> ds = obj | grep("some.*", use_regexp=True) - { 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], - 'matched_values': ['root[0]', "root[1]['long']"]} + >>> from deepdiff import DeepDiff + >>> t1 = {'a': [1, 2], 'b': [3, 4]} + >>> t2 = {'a': [2, 1], 'b': [4, 3]} + >>> DeepDiff(t1, t2, ignore_order=True) + {} + >>> def ignore_order_func(level): + ... return 'a' in level.path() + ... + >>> DeepDiff(t1, t2, ignore_order=True, ignore_order_func=ignore_order_func) + {'values_changed': {"root['b'][0]": {'new_value': 4, 'old_value': 3}, "root['b'][1]": {'new_value': 3, 'old_value': 4}}} ********* @@ -209,6 +185,7 @@ References commandline changelog authors + faq support diff --git a/requirements-cli.txt b/requirements-cli.txt index 7518df0d..98b0f981 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,4 +1,4 @@ -click==7.1.2 -pyyaml==5.4 +click==8.0.3 +pyyaml==5.4.1 toml==0.10.2 -clevercsv==0.6.7 +clevercsv==0.7.1 diff --git a/requirements-dev.txt b/requirements-dev.txt index 9a4cb01b..9f012d5f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,14 +1,15 @@ +wheel==0.37.0 -r requirements.txt -r requirements-cli.txt bump2version==1.0.1 -jsonpickle==1.4.2 -coverage==5.3.1 -ipdb==0.13.4 -numpy==1.19.4 -pytest==6.2.1 -pytest-cov==2.10.1 -python-dotenv==0.15.0 -watchdog==1.0.2 -Sphinx==3.4.1 +jsonpickle==2.0.0 +coverage==6.0.2 +ipdb==0.13.9 +numpy==1.21.2 +pytest==6.2.5 +pytest-cov==3.0.0 +python-dotenv==0.19.1 +watchdog==2.1.6 +Sphinx==4.2.0 sphinx-sitemap==2.2.0 -flake8==3.8.4 +flake8==4.0.1 diff --git a/setup.cfg b/setup.cfg index e9acb1f9..51f9bf75 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.5.0 +current_version = 5.6.0 commit = True tag = True tag_name = {new_version} diff --git a/setup.py b/setup.py index 2d51c3a3..e12e53c7 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '5.5.0' +version = '5.6.0' def get_reqs(filename): @@ -54,6 +54,7 @@ def get_reqs(filename): "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: Implementation :: PyPy", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License" diff --git a/tests/test_delta.py b/tests/test_delta.py index 66a6c24a..411cd91a 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -1079,6 +1079,8 @@ def test_delta_view_and_to_delta_dict_are_equal_when_parameteres_passed(self): 'cache_size': 500, 'cutoff_intersection_for_pairs': 0.6, 'group_by': None, + 'ignore_order_func': lambda *args, **kwargs: True, + 'custom_operators': [] } expected = {'iterable_items_added_at_indexes': {'root': {1: 1, 2: 1, 3: 1}}, 'iterable_items_removed_at_indexes': {'root': {1: 2, 2: 2}}} @@ -1331,6 +1333,29 @@ def test_delta_with_json_serializer(self): delta_reloaded_again = Delta(delta_file=the_file, deserializer=json.loads) assert t2 == delta_reloaded_again + t1 + def test_brackets_in_keys(self): + """ + Delta calculation not correct when bracket in Json key + https://github.com/seperman/deepdiff/issues/265 + """ + t1 = "{ \ + \"test\": \"test1\" \ + }" + + t2 = "{ \ + \"test\": \"test1\", \ + \"test2 [uuu]\": \"test2\" \ + }" + + json1 = json.loads(t1) + json2 = json.loads(t2) + + ddiff = DeepDiff(json1, json2) + delta = Delta(ddiff) + + original_json2 = delta + json1 + assert json2 == original_json2 + class TestDeltaCompareFunc: diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 06e0b2e2..5b3f9e43 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1515,3 +1515,13 @@ def test_group_by_not_list_of_dicts(self): diff = DeepDiff(t1, t2, group_by='id') expected = {'values_changed': {'root[1]': {'new_value': 3, 'old_value': 2}}} assert expected == diff + + def test_datetime_in_key(self): + + now = datetime.datetime.utcnow() + t1 = {now: 1, now + datetime.timedelta(1): 4} + t2 = {now: 2, now + datetime.timedelta(1): 4} + diff = DeepDiff(t1, t2) + expected = {'values_changed': {f'root[{repr(now)}]': {'new_value': 2, 'old_value': 1}}} + + assert expected == diff diff --git a/tests/test_hash.py b/tests/test_hash.py index 3097be9e..ba61fbfc 100755 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -134,7 +134,7 @@ def __repr__(self): t1 = Bad() result = DeepHash(t1) - expected_result = {t1: unprocessed, 'unprocessed': [t1]} + expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]} assert expected_result == result def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): @@ -407,7 +407,7 @@ def __str__(self): t1 = Bad() result = DeepHashPrep(t1) - expected_result = {t1: unprocessed, 'unprocessed': [t1]} + expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]} assert expected_result == result class Burrito: diff --git a/tests/test_helper.py b/tests/test_helper.py index e70ab711..cdb4fe8a 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -77,6 +77,8 @@ def test_get_numpy_ndarray_rows(self): @pytest.mark.parametrize('item, expected', [ ('10', 10), ("Decimal('10.1')", Decimal('10.1')), + ("datetime.datetime(2021, 10, 13, 4, 54, 48, 959835)", datetime.datetime(2021, 10, 13, 4, 54, 48, 959835)), + ("datetime.date(2021, 10, 13)", datetime.date(2021, 10, 13)), ]) def test_literal_eval_extended(self, item, expected): result = literal_eval_extended(item) diff --git a/tests/test_ignore_order.py b/tests/test_ignore_order.py index 1c059493..f69a416d 100644 --- a/tests/test_ignore_order.py +++ b/tests/test_ignore_order.py @@ -928,3 +928,88 @@ def compare_func(x, y, level=None): ddiff2 = DeepDiff(t1, t2, ignore_order=True, cutoff_intersection_for_pairs=1, cutoff_distance_for_pairs=1, iterable_compare_func=compare_func) assert expected_with_compare_func == ddiff2 assert ddiff != ddiff2 + + +class TestDynamicIgnoreOrder: + def test_ignore_order_func(self): + t1 = { + "order_matters": [ + {1}, + { + 'id': 2, + 'value': [7, 8, 1] + }, + { + 'id': 3, + 'value': [7, 8], + }, + ], + "order_does_not_matter": [ + {1}, + { + 'id': 2, + 'value': [7, 8, 1] + }, + { + 'id': 3, + 'value': [7, 8], + }, + ] + } + + t2 = { + "order_matters": [ + { + 'id': 2, + 'value': [7, 8] + }, + { + 'id': 3, + 'value': [7, 8, 1], + }, + {}, + ], + "order_does_not_matter": [ + { + 'id': 2, + 'value': [7, 8] + }, + { + 'id': 3, + 'value': [7, 8, 1], + }, + {}, + ] + } + + def ignore_order_func(level): + return "order_does_not_matter" in level.path() + + ddiff = DeepDiff(t1, t2, cutoff_intersection_for_pairs=1, cutoff_distance_for_pairs=1, ignore_order_func=ignore_order_func) + + expected = { + 'type_changes': { + "root['order_matters'][0]": { + 'old_type': set, + 'new_type': dict, + 'old_value': {1}, + 'new_value': {'id': 2, 'value': [7, 8]} + }, + "root['order_does_not_matter'][0]": { + 'old_type': set, + 'new_type': dict, + 'old_value': {1}, + 'new_value': {} + } + }, + 'dictionary_item_removed': [ + "root['order_matters'][2]['id']", + "root['order_matters'][2]['value']" + ], + 'values_changed': { + "root['order_matters'][1]['id']": {'new_value': 3, 'old_value': 2}, + "root['order_does_not_matter'][2]['id']": {'new_value': 2, 'old_value': 3}, + "root['order_does_not_matter'][1]['id']": {'new_value': 3, 'old_value': 2} + } + } + assert expected == ddiff diff --git a/tests/test_model.py b/tests/test_model.py index 6bfee076..cc5390b6 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import datetime import logging import pytest from tests import CustomClass, CustomClassMisleadingRepr diff --git a/tests/test_operators.py b/tests/test_operators.py new file mode 100644 index 00000000..30279987 --- /dev/null +++ b/tests/test_operators.py @@ -0,0 +1,176 @@ +import math + +from typing import List +from deepdiff import DeepDiff +from deepdiff.operator import BaseOperator + + +class TestOperators: + def test_custom_operators_prevent_default(self): + t1 = { + "coordinates": [ + {"x": 5, "y": 5}, + {"x": 8, "y": 8} + ] + } + + t2 = { + "coordinates": [ + {"x": 6, "y": 6}, + {"x": 88, "y": 88} + ] + } + + class L2DistanceDifferWithPreventDefault(BaseOperator): + def __init__(self, regex_paths: List[str], distance_threshold: float): + super().__init__(regex_paths) + self.distance_threshold = distance_threshold + + def _l2_distance(self, c1, c2): + return math.sqrt( + (c1["x"] - c2["x"]) ** 2 + (c1["y"] - c2["y"]) ** 2 + ) + + def give_up_diffing(self, level, diff_instance): + l2_distance = self._l2_distance(level.t1, level.t2) + if l2_distance > self.distance_threshold: + diff_instance.custom_report_result('distance_too_far', level, { + "l2_distance": l2_distance + }) + return True + + ddiff = DeepDiff(t1, t2, custom_operators=[L2DistanceDifferWithPreventDefault( + ["^root\\['coordinates'\\]\\[\\d+\\]$"], + 1 + )]) + + expected = { + 'distance_too_far': { + "root['coordinates'][0]": {'l2_distance': 1.4142135623730951}, + "root['coordinates'][1]": {'l2_distance': 113.13708498984761} + } + } + assert expected == ddiff + + def test_custom_operators_not_prevent_default(self): + t1 = { + "coordinates": [ + {"x": 5, "y": 5}, + {"x": 8, "y": 8} + ] + } + + t2 = { + "coordinates": [ + {"x": 6, "y": 6}, + {"x": 88, "y": 88} + ] + } + + class L2DistanceDifferWithPreventDefault(BaseOperator): + def __init__(self, regex_paths, distance_threshold): + super().__init__(regex_paths) + self.distance_threshold = distance_threshold + + def _l2_distance(self, c1, c2): + return math.sqrt( + (c1["x"] - c2["x"]) ** 2 + (c1["y"] - c2["y"]) ** 2 + ) + + def give_up_diffing(self, level, diff_instance): + l2_distance = self._l2_distance(level.t1, level.t2) + if l2_distance > self.distance_threshold: + diff_instance.custom_report_result('distance_too_far', level, { + "l2_distance": l2_distance + }) + # + return False + + ddiff = DeepDiff(t1, t2, custom_operators=[L2DistanceDifferWithPreventDefault( + ["^root\\['coordinates'\\]\\[\\d+\\]$"], + 1 + ) + ]) + expected = { + 'values_changed': { + "root['coordinates'][0]['x']": {'new_value': 6, 'old_value': 5}, + "root['coordinates'][0]['y']": {'new_value': 6, 'old_value': 5}, + "root['coordinates'][1]['x']": {'new_value': 88, 'old_value': 8}, + "root['coordinates'][1]['y']": {'new_value': 88, 'old_value': 8} + }, + 'distance_too_far': { + "root['coordinates'][0]": {'l2_distance': 1.4142135623730951}, + "root['coordinates'][1]": {'l2_distance': 113.13708498984761} + } + } + assert expected == ddiff + + def test_custom_operators_should_not_equal(self): + t1 = { + "id": 5, + "expect_change_pos": 10, + "expect_change_neg": 10, + } + + t2 = { + "id": 5, + "expect_change_pos": 100, + "expect_change_neg": 10, + } + + class ExpectChangeOperator(BaseOperator): + def __init__(self, regex_paths): + super().__init__(regex_paths) + + def give_up_diffing(self, level, diff_instance): + if level.t1 == level.t2: + diff_instance.custom_report_result('unexpected:still', level, { + "old": level.t1, + "new": level.t2 + }) + + return True + + ddiff = DeepDiff(t1, t2, custom_operators=[ + ExpectChangeOperator(regex_paths=["root\\['expect_change.*'\\]"]) + ]) + + assert ddiff == {'unexpected:still': {"root['expect_change_neg']": {'old': 10, 'new': 10}}} + + def test_custom_operator2(self): + + class CustomClass: + + def __init__(self, d: dict, l: list): + self.dict = d + self.dict['list'] = l + + def __repr__(self): + return "Class list is " + str(self.dict['list']) + + custom1 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3]) + custom2 = CustomClass(d=dict(c=3, d=4), l=[1, 2, 3, 2]) + custom3 = CustomClass(d=dict(a=1, b=2), l=[1, 2, 3, 4]) + + class ListMatchOperator(BaseOperator): + + def give_up_diffing(self, level, diff_instance): + if set(level.t1.dict['list']) == set(level.t2.dict['list']): + return True + + ddiff = DeepDiff(custom1, custom2, custom_operators=[ + ListMatchOperator(types=[CustomClass]) + ]) + + assert {} == ddiff + + ddiff2 = DeepDiff(custom2, custom3, custom_operators=[ + ListMatchOperator(types=[CustomClass]) + ]) + + expected = { + 'dictionary_item_added': ["root.dict['a']", "root.dict['b']"], + 'dictionary_item_removed': ["root.dict['c']", "root.dict['d']"], + 'values_changed': {"root.dict['list'][3]": {'new_value': 4, 'old_value': 2}}} + + assert expected == ddiff2 diff --git a/tests/test_path.py b/tests/test_path.py index a4722053..b4883d17 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -25,6 +25,9 @@ def test_path_to_elements(path, expected): ({1: [{'2': 'b'}, 3], 2: {4, 5}}, "root[1][0]['2']", 'b'), + ({'test [a]': 'b'}, + "root['test [a]']", + 'b'), ]) def test_get_item(obj, path, expected): result = extract(obj, path) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index 68f9482c..3c5f685a 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -81,7 +81,7 @@ class B: # These lines are long but make it easier to notice the difference: @pytest.mark.parametrize('verbose_level, expected', [ - (0, {"type_changes": {"root[0]": {"old_type": str, "new_type": int}}, "dictionary_item_added": ["root[1][5]"], "dictionary_item_removed": ["root[1][3]"], "values_changed": {"root[1][1]": {"new_value": 2, "old_value": 1}}, "iterable_item_added": {"root[2]": "d"}}), + (0, {"type_changes": {"root[0]": {"old_type": str, "new_type": int}}, "dictionary_item_added": ["root[1][5]"], "dictionary_item_removed": ["root[1][3]"], "iterable_item_added": {"root[2]": "d"}}), (1, {"type_changes": {"root[0]": {"old_type": str, "new_type": int, "old_value": "a", "new_value": 10}}, "dictionary_item_added": ["root[1][5]"], "dictionary_item_removed": ["root[1][3]"], "values_changed": {"root[1][1]": {"new_value": 2, "old_value": 1}}, "iterable_item_added": {"root[2]": "d"}}), (2, {"type_changes": {"root[0]": {"old_type": str, "new_type": int, "old_value": "a", "new_value": 10}}, "dictionary_item_added": {"root[1][5]": 6}, "dictionary_item_removed": {"root[1][3]": 4}, "values_changed": {"root[1][1]": {"new_value": 2, "old_value": 1}}, "iterable_item_added": {"root[2]": "d"}}), ])