Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,18 +406,18 @@ Example in DeepDiff for the same operation:
{'type_changes': {"root['a']['b']['c']": {'old_type': <type 'str'>, 'new_value': 42, 'old_value': 'foo', 'new_type': <type 'int'>}}}
```

# Pycon 2016

I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think:
# Documentation

[Diff It To Dig It Video](https://www.youtube.com/watch?v=J5r99eJIxF4)
And here is more info: <http://zepworks.com/blog/diff-it-to-digg-it/>
<https://zepworks.com/deepdiff/current/>


# Documentation
# Pycon 2016

<http://deepdiff.readthedocs.io/en/latest/>
I was honored to give a talk about the basics of how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think:

[Diff It To Dig It Video](https://www.youtube.com/watch?v=J5r99eJIxF4)
And here is more info: <http://zepworks.com/blog/diff-it-to-digg-it/>

# ChangeLog

Expand Down
50 changes: 45 additions & 5 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,13 @@
RemapDict, ResultDict, TextResult, TreeResult, DiffLevel,
DictRelationship, AttributeRelationship,
SubscriptableIterableRelationship, NonSubscriptableIterableRelationship,
SetRelationship, NumpyArrayRelationship)
SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD)
from deepdiff.deephash import DeepHash, combine_hashes_lists
from deepdiff.base import Base
from deepdiff.lfucache import LFUCache, DummyLFU

logger = logging.getLogger(__name__)


MAX_PASSES_REACHED_MSG = (
'DeepDiff has reached the max number of passes of {}. '
'You can possibly get more accurate results by increasing the max_passes parameter.')
Expand Down Expand Up @@ -120,6 +119,7 @@ def __init__(self,
hasher=None,
hashes=None,
ignore_order=False,
ignore_order_func=None,
ignore_type_in_groups=None,
ignore_string_type_changes=False,
ignore_numeric_type_changes=False,
Expand All @@ -140,6 +140,7 @@ def __init__(self,
verbose_level=1,
view=TEXT_VIEW,
iterable_compare_func=None,
custom_operators=None,
_original_type=None,
_parameters=None,
_shared_parameters=None,
Expand All @@ -156,12 +157,17 @@ def __init__(self,
"cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
"cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, "
"math_epsilon, iterable_compare_func, _original_type, "
"ignore_order_func, custom_operators, "
"_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))

if _parameters:
self.__dict__.update(_parameters)
else:
self.custom_operators = custom_operators or []
self.ignore_order = ignore_order

self.ignore_order_func = ignore_order_func or (lambda *_args, **_kwargs: ignore_order)

ignore_type_in_groups = ignore_type_in_groups or []
if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups:
ignore_numeric_type_changes = True
Expand Down Expand Up @@ -327,6 +333,24 @@ def _report_result(self, report_type, level):
level.report_type = report_type
self.tree[report_type].add(level)

def custom_report_result(self, report_type, level, extra_info=None):
"""
Add a detected change to the reference-style result dictionary.
report_type will be added to level.
(We'll create the text-style report from there later.)
:param report_type: A well defined string key describing the type of change.
Examples: "set_item_added", "values_changed"
:param parent: A DiffLevel object describing the objects in question in their
before-change and after-change object structure.
:param extra_info: A dict that describe this result
:rtype: None
"""

if not self._skip_this(level):
level.report_type = report_type
level.additional[CUSTOM_FIELD] = extra_info
self.tree[report_type].add(level)

@staticmethod
def _dict_from_slots(object):
def unmangle(attribute):
Expand Down Expand Up @@ -556,7 +580,7 @@ def _iterables_subscriptable(t1, t2):

def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None):
"""Difference of iterables"""
if self.ignore_order:
if self.ignore_order_func(level):
self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type)
else:
self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type)
Expand Down Expand Up @@ -1133,7 +1157,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset()):
# which means numpy module needs to be available. So np can't be None.
raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover

if not self.ignore_order:
if not self.ignore_order_func(level):
# fast checks
if self.significant_digits is None:
if np.array_equal(level.t1, level.t2):
Expand All @@ -1159,7 +1183,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset()):
dimensions = len(shape)
if dimensions == 1:
self._diff_iterable(level, parents_ids, _original_type=_original_type)
elif self.ignore_order:
elif self.ignore_order_func(level):
# arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
# They will be converted back to Numpy at their final dimension.
level.t1 = level.t1.tolist()
Expand Down Expand Up @@ -1219,6 +1243,19 @@ def _auto_off_cache(self):
self._stats[DISTANCE_CACHE_ENABLED] = False
self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache'))

def _use_custom_operator(self, level):
"""

"""
used = False

for operator in self.custom_operators:
if operator.match(level):
prevent_default = operator.diff(level, self)
used = True if prevent_default is None else prevent_default

return used

def _diff(self, level, parents_ids=frozenset(), _original_type=None):
"""
The main diff method
Expand All @@ -1232,6 +1269,9 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None):
if self._count_diff() is StopIteration:
return

if self._use_custom_operator(level):
return

if level.t1 is level.t2:
return

Expand Down
12 changes: 5 additions & 7 deletions deepdiff/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class np_type:
np_int8, np_int16, np_int32, np_int64, np_uint8,
np_uint16, np_uint32, np_uint64, np_intp, np_uintp,
np_float32, np_float64, np_float_, np_complex64,
np_complex128, np_complex_, )
np_complex128, np_complex_,)

numpy_dtypes = set(numpy_numbers)
numpy_dtypes.add(np_bool_)
Expand Down Expand Up @@ -112,7 +112,6 @@ def copy(self): # pragma: no cover. Only used in pypy3 and py3.5
else:
dict_ = OrderedDictPlus # pragma: no cover. Only used in pypy3 and py3.5


if py4:
logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') # pragma: no cover
py3 = True # pragma: no cover
Expand Down Expand Up @@ -184,6 +183,7 @@ class NotPresent: # pragma: no cover
in the future.
We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D
"""

def __repr__(self):
return 'not present' # pragma: no cover

Expand All @@ -202,7 +202,6 @@ class CannotCompare(Exception):
not_hashed = NotHashed()
notpresent = NotPresent()


# Disabling remapping from old to new keys since the mapping is deprecated.
RemapDict = dict_

Expand Down Expand Up @@ -316,8 +315,8 @@ def type_in_type_group(item, type_group):

def type_is_subclass_of_type_group(item, type_group):
return isinstance(item, type_group) \
or (isinstance(item, type) and issubclass(item, type_group)) \
or type_in_type_group(item, type_group)
or (isinstance(item, type) and issubclass(item, type_group)) \
or type_in_type_group(item, type_group)


def get_doc(doc_filename):
Expand Down Expand Up @@ -426,7 +425,6 @@ def __repr__(self):

not_found = _NotFound()


warnings.simplefilter('once', DeepDiffDeprecationWarning)


Expand Down Expand Up @@ -583,7 +581,7 @@ def get_homogeneous_numpy_compatible_type_of_seq(seq):
iseq = iter(seq)
first_type = type(next(iseq))
if first_type in {int, float, Decimal}:
type_ = first_type if all((type(x) is first_type) for x in iseq ) else False
type_ = first_type if all((type(x) is first_type) for x in iseq) else False
return PYTHON_TYPE_TO_NUMPY_TYPE.get(type_, False)
else:
return False
35 changes: 31 additions & 4 deletions deepdiff/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
"repetition_change",
}

CUSTOM_FIELD = "__internal:custom:extra_info"


class DoesNotExist(Exception):
pass
Expand All @@ -47,6 +49,7 @@ class PrettyOrderedSet(OrderedSet):
From the perspective of the users of the library, they are dealing with lists.
Behind the scene, we have ordered sets.
"""

def __repr__(self):
return '[{}]'.format(", ".join(map(str, self)))

Expand Down Expand Up @@ -85,9 +88,13 @@ def mutual_add_removes_to_become_value_changes(self):
if 'iterable_item_added' in self and not self['iterable_item_added']:
del self['iterable_item_added']

def __getitem__(self, item):
if item not in self:
self[item] = PrettyOrderedSet()
return self.get(item)

class TextResult(ResultDict):

class TextResult(ResultDict):
ADD_QUOTES_TO_STRINGS = True

def __init__(self, tree_results=None, verbose_level=1):
Expand Down Expand Up @@ -135,6 +142,7 @@ def _from_tree_results(self, tree):
self._from_tree_set_item_added(tree)
self._from_tree_repetition_change(tree)
self._from_tree_deep_distance(tree)
self._from_tree_custom_results(tree)

def _from_tree_default(self, tree, report_type):
if report_type in tree:
Expand Down Expand Up @@ -231,17 +239,36 @@ def _from_tree_repetition_change(self, tree):
if 'repetition_change' in tree:
for change in tree['repetition_change']:
path = change.path(force=FORCE_DEFAULT)
self['repetition_change'][path] = RemapDict(change.additional[
'repetition'])
self['repetition_change'][path] = RemapDict(
change.additional['repetition']
)
self['repetition_change'][path]['value'] = change.t1

def _from_tree_deep_distance(self, tree):
if 'deep_distance' in tree:
self['deep_distance'] = tree['deep_distance']

def _from_tree_custom_results(self, tree):
for k, _level_list in tree.items():
if k not in REPORT_KEYS:
if not isinstance(_level_list, PrettyOrderedSet):
continue

class DeltaResult(TextResult):
# if len(_level_list) == 0:
# continue
#
# if not isinstance(_level_list[0], DiffLevel):
# continue

# _level_list is a list of DiffLevel
_custom_dict = {}
for _level in _level_list:
_custom_dict[_level.path(
force=FORCE_DEFAULT)] = _level.additional.get(CUSTOM_FIELD, {})
self[k] = _custom_dict


class DeltaResult(TextResult):
ADD_QUOTES_TO_STRINGS = False

def __init__(self, tree_results=None, ignore_order=None):
Expand Down
16 changes: 16 additions & 0 deletions deepdiff/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import re


class BaseOperator:
__operator_name__ = "__base__"

def __init__(self, path_regex):
self.path_regex = path_regex
self.regex = re.compile(f"^{self.path_regex}$")

def match(self, level) -> bool:
matched = re.search(self.regex, level.path()) is not None
return matched

def diff(self, level, instance) -> bool:
raise NotImplementedError
2 changes: 1 addition & 1 deletion docs/diff_doc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ ignore_nan_inequality: Boolean, default = False

iterable_compare_func:
:ref:`iterable_compare_func_label`:
There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a iterable_compare_func that takes a function pointer to compare two items. It function takes two parameters and should return True if it is a match, False if it is not a match or raise CannotCompare if it is unable to compare the two.
There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a iterable_compare_func that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return True if it is a match, False if it is not a match or raise CannotCompare if it is unable to compare the two.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing this. I thought I had found all the instances of this incorrect sentence already but obviously not!


ignore_private_variables: Boolean, default = True
:ref:`ignore_private_variables_label`
Expand Down
25 changes: 20 additions & 5 deletions docs/ignore_order.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@ List difference ignoring order or duplicates: (with the same dictionaries as abo
>>> print (ddiff)
{}

.. _ignore_order_func_label:

Dynamic Ignore Order
--------------------

Sometimes single *ignore_order* parameter is not enough to do a diff job,
you can use *ignore_order_func* to determine whether the order of certain paths should be ignored

List difference ignoring order with *ignore_order_func*
>>> t1 = {"set": [1,2,3], "list": [1,2,3]}
>>> t2 = {"set": [3,2,1], "list": [3,2,1]}
>>> ddiff = DeepDiff(t1, t2, ignore_order_func=lambda level: "set" in level.path())
>>> print (ddiff)
{ 'values_changed': { "root['list'][0]": {'new_value': 3, 'old_value': 1},
"root['list'][2]": {'new_value': 1, 'old_value': 3}}}

.. _report_repetition_label:

Expand Down Expand Up @@ -78,7 +93,7 @@ You can control the maximum number of passes that can be run via the max_passes
Max Passes Example
>>> from pprint import pprint
>>> from deepdiff import DeepDiff
>>>
>>>
>>> t1 = [
... {
... 'key3': [[[[[1, 2, 4, 5]]]]],
Expand All @@ -89,7 +104,7 @@ Max Passes Example
... 'key6': 'val6',
... },
... ]
>>>
>>>
>>> t2 = [
... {
... 'key5': 'CHANGE',
Expand All @@ -100,12 +115,12 @@ Max Passes Example
... 'key4': [7, 8],
... },
... ]
>>>
>>>
>>> for max_passes in (1, 2, 62, 65):
... diff = DeepDiff(t1, t2, ignore_order=True, max_passes=max_passes, verbose_level=2)
... print('-\n----- Max Passes = {} -----'.format(max_passes))
... pprint(diff)
...
...
DeepDiff has reached the max number of passes of 1. You can possibly get more accurate results by increasing the max_passes parameter.
-
----- Max Passes = 1 -----
Expand Down Expand Up @@ -227,7 +242,7 @@ Iterable Compare Func

New in DeepDiff 5.5.0

There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. It function takes two parameters and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two.
There are times that we want to guide DeepDiff as to what items to compare with other items. In such cases we can pass a `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two.


For example take the following objects:
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ What is New
New In DeepDiff 5.5.0
---------------------

1. New option called `iterable_compare_func` that takes a function pointer to compare two items. It function takes two parameters and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. If `CannotCompare` is raised then it will revert back to comparing in order. If `iterable_compare_func` is not provided or set to None the behavior defaults to comparing items in order. A new report item called `iterable_item_moved` this will only ever be added if there is a custom compare function.
1. New option called `iterable_compare_func` that takes a function pointer to compare two items. The function takes three parameters (x, y, level) and should return `True` if it is a match, `False` if it is not a match or raise `CannotCompare` if it is unable to compare the two. If `CannotCompare` is raised then it will revert back to comparing in order. If `iterable_compare_func` is not provided or set to None the behavior defaults to comparing items in order. A new report item called `iterable_item_moved` this will only ever be added if there is a custom compare function.

>>> from deepdiff import DeepDiff
>>> from deepdiff.helper import CannotCompare
Expand Down
Loading