diff --git a/.gitignore b/.gitignore index bed0daa2..359eaf68 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] +.pytest_cache/ # C extensions *.so @@ -62,3 +63,5 @@ target/ .idea/ .~lock* +.python-version + diff --git a/.travis.yml b/.travis.yml index 07a97317..bcbb06e6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,19 @@ language: python -python: - - "2.7" - - "3.3" - - "3.4" - - "3.5" - - "3.6" - - "pypy-5.4" # pypy on python 2.7 - # - "pypy3" # Removing pypy3 from travis since travis's pypy3 seems buggy - -sudo: false +matrix: + include: + - python: 3.4 + - python: 3.5 + - python: 3.6 + - python: pypy3 + - python: 3.7 + dist: xenial + sudo: true install: - - pip install coveralls + - pip install -r requirements-dev.txt -script: coverage run --source deepdiff setup.py test +script: pytest --cov=deepdiff tests/ after_success: - coveralls diff --git a/AUTHORS b/AUTHORS index 670dc09a..e598a258 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,8 +1,6 @@ Authors: - Seperman - Victor Hahn Castell @ Flexoptix - -Also thanks to: - nfvs for Travis-CI setup script. - brbsix for initial Py3 porting. - WangFenjin for unicode support. @@ -15,3 +13,10 @@ Also thanks to: - movermeyer for updating docs - maxrothman for search in inherited class attributes - maxrothman for search for types/objects +- MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix, enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order +- Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support diff --git a/README.md b/README.md index 7fb5dae3..b5fda0de 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ -# deepdiff v 3.3.0 +# DeepDiff v 4.0.0 -[![Join the chat at https://gitter.im/deepdiff/Lobby](https://badges.gitter.im/deepdiff/Lobby.svg)](https://gitter.im/deepdiff/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) ![Doc](https://readthedocs.org/projects/deepdiff/badge/?version=latest) @@ -8,25 +7,15 @@ [![Build Status](https://travis-ci.org/seperman/deepdiff.svg?branch=master)](https://travis-ci.org/seperman/deepdiff) [![Coverage Status](https://coveralls.io/repos/github/seperman/deepdiff/badge.svg?branch=master)](https://coveralls.io/github/seperman/deepdiff?branch=master) -Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. -Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 - -## Table of Contents - -- [Installation](#Installation) -- [Parameters](#parameters) -- [Ignore Order](#ignore-order) -- [Report repetitions](#report-repetitions) -- [Exclude types or paths](#exclude-type-or-paths) -- [Significant Digits](#significant-digits) -- [Verbose Level](#verbose-level) -- [Deep Search](#deep-search) -- [Using DeepDiff in unit tests](#using-deepdiff-in-unit-tests) -- [Difference with Json Patch](#difference-with-json-patch) -- [Views](#views) -- [Text View](#text-view) -- [Tree View](#tree-view) -- [Serialization](#serialization) +- DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. +- DeepSearch: Search for objects within other objects. +- DeepHash: Hash any object based on their content. + +Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 + +**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** + + - [Documentation](http://deepdiff.readthedocs.io/en/latest/) @@ -40,24 +29,18 @@ Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 ```python >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects ->>> from deepdiff import DeepSearch # For finding if item exists in an object +>>> from deepdiff import grep, DeepSearch # For finding if item exists in an object +>>> from deepdiff import DeepHash # For hashing objects based on their contents ``` -## Parameters - -In addition to the 2 objects being compared: +# Deep Diff -- [ignore_order](#ignore-order) -- [report_repetition](#report-repetitions) -- [verbose_level](#verbose-level) +DeepDiff gets the difference of 2 objects. -## Supported data types +> - Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +> - The full documentation can be found on -int, string, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - -## Ignore Order - -Sometimes you don't care about the order of objects when comparing them. In those cases, you can set `ignore_order=True`. However this flag won't report the repetitions to you. You need to additionally enable `report_repetition=True` for getting a report of repetitions. +## Examples ### List difference ignoring order or duplicates @@ -69,7 +52,7 @@ Sometimes you don't care about the order of objects when comparing them. In thos {} ``` -## Report repetitions +### Report repetitions This flag ONLY works when ignoring order is enabled. Note that this feature is experimental. @@ -97,9 +80,8 @@ which will print you: 'new_repeat': 2}}} ``` -## Exclude types or paths - ### Exclude certain types from comparison: + ```python >>> l1 = logging.getLogger("test") >>> l2 = logging.getLogger("test2") @@ -109,7 +91,8 @@ which will print you: {} ``` -### Exclude part of your object tree from comparison: +### Exclude part of your object tree from comparison + ```python >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} @@ -117,7 +100,22 @@ which will print you: {} ``` -## Significant Digits +### Exclude Regex Paths + + +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. + +```python +>>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] +>>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] +>>> print(DeepDiff(t1, t2, exclude_regex_paths={r"root\[\d+\]\['b'\]"})) +{} +>>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") +>>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) +{} +``` + +### Significant Digits Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits @@ -130,112 +128,37 @@ Digits **after** the decimal point. Internally it uses "{:.Xf}".format(Your Numb {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} ``` -Approximate float comparison: - -```python ->>> t1 = [ 1.1129, 1.3359 ] ->>> t2 = [ 1.113, 1.3362 ] ->>> pprint(DeepDiff(t1, t2, significant_digits=3)) -{} ->>> pprint(DeepDiff(t1, t2)) -{'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} ->>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) -{'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} -``` - -## Verbose Level - -Verbose level by default is 1. The possible values are 0, 1 and 2. - -- Verbose level 0: won't report values when type changed. [Example](##type-of-an-item-has-changed) -- Verbose level 1: default -- Verbose level 2: will report values when custom objects or dictionaries have items added or removed. [Example](#items-added-or-removed-verbose) - -## Deep Search -(New in v2-1-0) - -Tip: Take a look at [grep](#grep) which gives you a new interface for DeepSearch! - -DeepDiff comes with a utility to find the path to the item you are looking for. -It is called DeepSearch and it has a similar interface to DeepDiff. - -Let's say you have a huge nested object and want to see if any item with the word `somewhere` exists in it. +### Ignore Type Number - List that contains float and integer: ```py -from deepdiff import DeepSearch -obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} -ds = DeepSearch(obj, "somewhere", verbose_level=2) -print(ds) -``` - -Which will print: - -```py -{'matched_paths': {"root['somewhere']": "around"}, - 'matched_values': {"root['long']": "somewhere"}} -``` - -Tip: An interesting use case is to search inside `locals()` when doing pdb. - -## Grep -(New in v3-2-0) - -Grep is another interface for DeepSearch. -Just grep through your objects as you would in shell! - -```py -from deepdiff import grep -obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} -ds = obj | grep("somewhere") -print(ds) -``` - -Which will print: - -```py -{'matched_paths': {"root['somewhere']"}, - 'matched_values': {"root['long']"}} -``` - -And you can pass all the same kwargs as DeepSearch to grep too: - -```py ->>> obj | grep(item, verbose_level=2) -{'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} -``` - -## Using DeepDiff in unit tests - -`result` is the output of the function that is being tests. -`expected` is the expected output of the function. - -```python -assertEqual(DeepDiff(result, expected), {}) -``` - -## Difference with Json Patch - -Unlike [Json Patch](https://tools.ietf.org/html/rfc6902) which is designed only for Json objects, DeepDiff is designed specifically for almost all Python types. In addition to that, DeepDiff checks for type changes and attribute value changes that Json Patch does not cover since there are no such things in Json. Last but not least, DeepDiff gives you the exact path of the item(s) that were changed in Python syntax. - -Example in Json Patch for replacing: - -`{ "op": "replace", "path": "/a/b/c", "value": 42 }` - -Example in DeepDiff for the same operation: - -```python ->>> item1 = {'a':{'b':{'c':'foo'}}} ->>> item2 = {'a':{'b':{'c':42}}} ->>> DeepDiff(item1, item2) -{'type_changes': {"root['a']['b']['c']": {'old_type': , 'new_value': 42, 'old_value': 'foo', 'new_type': }}} +>>> from deepdiff import DeepDiff +>>> from pprint import pprint +>>> t1 = [1, 2, 3] +>>> t2 = [1.0, 2.0, 3.0] +>>> ddiff = DeepDiff(t1, t2) +>>> pprint(ddiff, indent=2) +{ 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} +>>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=True) +>>> pprint(ddiff, indent=2) +{} ``` -# Views +## Views Starting with DeepDiff v 3, there are two different views into your diffed data: text view (original) and tree view (new). -## Text View +### Text View Text view is the original and currently the default view of DeepDiff. @@ -257,530 +180,203 @@ So for example `ddiff['dictionary_item_removed']` is a set if strings thus this The following examples are using the *default text view.* The Tree View is introduced in DeepDiff v3 and provides traversing capabilities through your diffed data and more! - Read more about the Tree View at the bottom of this page. - + Read more about the Tree View at the [tree view section](#tree-view) of this page. -### Importing - -```python ->>> from deepdiff import DeepDiff ->>> from pprint import pprint ->>> from __future__ import print_function # In case running on Python 2 -``` -### Same object returns empty +### Tree View -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = t1 ->>> print(DeepDiff(t1, t2)) -{} -``` - -### Type of an item has changed - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:"2", 3:3} ->>> pprint(DeepDiff(t1, t2), indent=2) -{ 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} -``` - -And if you don't care about the value of items that have changed type, please set verbose level to 0: - -```python ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:"2", 3:3} ->>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) -{ 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': ,}}} -``` +Starting the version v3 You can choose the view into the deepdiff results. +The tree view provides you with tree objects that you can traverse through to find the parents of the objects that are diffed and the actual objects that are being diffed. -### Value of an item has changed +#### Value of an item has changed (Tree View) ```python +>>> from deepdiff import DeepDiff +>>> from pprint import pprint >>> t1 = {1:1, 2:2, 3:3} >>> t2 = {1:1, 2:4, 3:3} ->>> pprint(DeepDiff(t1, t2), indent=2) -{'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} -``` - -### Item added or removed - -```python ->>> t1 = {1:1, 3:3, 4:4} ->>> t2 = {1:1, 3:3, 5:5, 6:6} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint(ddiff) -{'dictionary_item_added': {'root[5]', 'root[6]'}, - 'dictionary_item_removed': {'root[4]'}} -``` - -#### Items added or removed verbose - -And if you would like to know the values of items added or removed, please set the verbose_level to 2: - -```python ->>> t1 = {1:1, 3:3, 4:4} ->>> t2 = {1:1, 3:3, 5:5, 6:6} ->>> ddiff = DeepDiff(t1, t2, verbose_level=2) ->>> pprint(ddiff, indent=2) -{ 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, - 'dictionary_item_removed': {'root[4]': 4}} +>>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') +>>> ddiff_verbose0 +{'values_changed': {}} +>>> +>>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') +>>> ddiff_verbose1 +{'values_changed': {}} +>>> set_of_values_changed = ddiff_verbose1['values_changed'] +>>> # since set_of_values_changed includes only one item in a set +>>> # in order to get that one item we can: +>>> (changed,) = set_of_values_changed +>>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] + +>>> changed.t1 +2 +>>> changed.t2 +4 +>>> # You can traverse through the tree, get to the parents! +>>> changed.up + ``` -### String difference +### Serialization -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} ->>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} -``` +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +Note that to_dict will use the text view even if you did the diff in tree view. -### String difference 2 +Example: ```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - ->>> ->>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) ---- -+++ -@@ -1,5 +1,4 @@ --world! --Goodbye! -+world - 1 - 2 - End +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_dict() +{'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} ``` -### List difference +In order to do safe json serialization, use the to_json() method. -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} -``` - -### List difference Example 2 +Example: ```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} +>>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +>>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} +>>> ddiff = DeepDiff(t1, t2, view='tree') +>>> ddiff.to_json() +'{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' ``` -### List that contains dictionary: - -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (ddiff, indent = 2) -{ 'dictionary_item_removed': ["root[4]['b'][2][2]"], - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} -``` -### Sets: +> - Please take a look at the [DeepDiff docs](deepdiff/diff_doc.rst) +> - The full documentation can be found on -```python ->>> t1 = {1, 2, 8} ->>> t2 = {1, 2, 3, 5} ->>> ddiff = DeepDiff(t1, t2) ->>> pprint (DeepDiff(t1, t2)) -{'set_item_added': ['root[3]', 'root[5]'], 'set_item_removed': ['root[8]']} -``` -### Named Tuples: +# Deep Search -```python ->>> from collections import namedtuple ->>> Point = namedtuple('Point', ['x', 'y']) ->>> t1 = Point(x=11, y=22) ->>> t2 = Point(x=11, y=23) ->>> pprint (DeepDiff(t1, t2)) -{'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} -``` +DeepDiff comes with a utility to find the path to the item you are looking for. +It is called DeepSearch and it has a similar interface to DeepDiff. -### Custom objects: +Let's say you have a huge nested object and want to see if any item with the word `somewhere` exists in it. +Just grep through your objects as you would in shell! -```python ->>> class ClassA(object): -... a = 1 -... def __init__(self, b): -... self.b = b -... ->>> t1 = ClassA(1) ->>> t2 = ClassA(2) ->>> ->>> pprint(DeepDiff(t1, t2)) -{'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} +```py +from deepdiff import grep +obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} +ds = obj | grep("somewhere") +print(ds) ``` -### Object attribute added: +Which will print: -```python ->>> t2.c = "new attribute" ->>> pprint(DeepDiff(t1, t2)) -{'attribute_added': ['root.c'], - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} +```py +{'matched_paths': {"root['somewhere']"}, + 'matched_values': {"root['long']"}} ``` -### Exclude certain types from comparison: -```python ->>> l1 = logging.getLogger("test") ->>> l2 = logging.getLogger("test2") ->>> t1 = {"log": l1, 2: 1337} ->>> t2 = {"log": l2, 2: 1337} ->>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) -{} -``` +And you can pass all the same kwargs as DeepSearch to grep too: -### Exclude part of your object tree from comparison: -```python ->>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} ->>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} ->>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) -{} +```py +>>> obj | grep(item, verbose_level=2) +{'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` +> - Please take a look at the [DeepSearch docs](deepdiff/search_doc.rst) +> - The full documentation can be found on - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - - -## Tree View +# Deep Hash +(New in v4-0-0) -Starting the version v3 You can choose the view into the deepdiff results. -The tree view provides you with tree objects that you can traverse through to find the parents of the objects that are diffed and the actual objects that are being diffed. +DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! +DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. -This view is very useful when dealing with nested objects. -Note that tree view always returns results in the form of Python sets. +> - Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +> - The full documentation can be found on -You can traverse through the tree elements! +Let's say you have a dictionary object. - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. - -``` -+---------------------------------------------------------------+ -| | -| parent(t1) parent node parent(t2) | -| + ^ + | -+------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | -+------|----------------------|-------------------------|-------+ -| v v v | -| child(t1) child node child(t2) | -| | -+---------------------------------------------------------------+ +```py +>>> from deepdiff import DeepHash +>>> +>>> obj = {1: 2, 'a': 'b'} ``` - - up - Move up to the parent node - - down - Move down to the child node - - path() - Get the path to the current node - - t1 - The first item in the current node that is being diffed - - t2 - The second item in the current node that is being diffed - - additional - Additional information about the node i.e. repetition - - repetition - Shortcut to get the repetition report +If you try to hash it: - -The tree view allows you to have more than mere textual representaion of the diffed objects. -It gives you the actual objects (t1, t2) throughout the tree of parents and children. - -## Examples - Tree View - - The Tree View is introduced in DeepDiff v3 - Set view='tree' in order to use this view. - -### Value of an item has changed (Tree View) - -```python ->>> from deepdiff import DeepDiff ->>> from pprint import pprint ->>> t1 = {1:1, 2:2, 3:3} ->>> t2 = {1:1, 2:4, 3:3} ->>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') ->>> ddiff_verbose0 -{'values_changed': {}} ->>> ->>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') ->>> ddiff_verbose1 -{'values_changed': {}} ->>> set_of_values_changed = ddiff_verbose1['values_changed'] ->>> # since set_of_values_changed includes only one item in a set ->>> # in order to get that one item we can: ->>> (changed,) = set_of_values_changed ->>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - ->>> changed.t1 -2 ->>> changed.t2 -4 ->>> # You can traverse through the tree, get to the parents! ->>> changed.up - +```py +>>> hash(obj) +Traceback (most recent call last): + File "", line 1, in +TypeError: unhashable type: 'dict' ``` -### List difference (Tree View) +But with DeepHash: -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> ddiff -{'iterable_item_removed': {, }} ->>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. ->>> # One way to get one item from the set is to convert it to a list ->>> # And then get the first item of the list: ->>> removed = list(ddiff['iterable_item_removed'])[0] ->>> removed - ->>> ->>> parent = removed.up ->>> parent - ->>> parent.path() -"root[4]['b']" ->>> parent.t1 -[1, 2, 3, 4] ->>> parent.t2 -[1, 2] ->>> parent.up - ->>> parent.up.up - ->>> parent.up.up.t1 -{1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} ->>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff -True +```py +>>> from deepdiff import DeepHash +>>> obj = {1: 2, 'a': 'b'} +>>> DeepHash(obj) +{4355639248: 2468916477072481777512283587789292749, 4355639280: -35787773492556653776377555218122431491, 4358636128: -88390647972316138151822486391929534118, 4358009664: 8833996863197925870419376694314494743, 4357467952: 34150898645750099477987229399128149852} ``` -### List difference 2 (Tree View) +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint(ddiff, indent = 2) -{ 'iterable_item_added': {}, - 'values_changed': { , - }} ->>> ->>> # Note that iterable_item_added is a set with one item. ->>> # So in order to get that one item from it, we can do: ->>> ->>> (added,) = ddiff['iterable_item_added'] ->>> added - ->>> added.up.up - ->>> added.up.up.path() -'root[4]' ->>> added.up.up.down - ->>> ->>> # going up twice and then down twice gives you the same node in the tree: ->>> added.up.up.down.down == added -True +```py +>>> hashes = DeepHash(obj) +>>> hashes[obj] +34150898645750099477987229399128149852 ``` -### List difference ignoring order but reporting repetitions (Tree View) +Which you can write as: -```python ->>> t1 = [1, 3, 1, 4] ->>> t2 = [4, 4, 1] ->>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') ->>> pprint(ddiff, indent=2) -{ 'iterable_item_removed': {}, - 'repetition_change': { , - }} ->>> ->>> # repetition_change is a set with 2 items. ->>> # in order to get those 2 items, we can do the following. ->>> # or we can convert the set to list and get the list items. ->>> # or we can iterate through the set items ->>> ->>> (repeat1, repeat2) = ddiff['repetition_change'] ->>> repeat1 # the default verbosity is set to 1. - ->>> # The actual data regarding the repetitions can be found in the repetition attribute: ->>> repeat1.repetition -{'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} ->>> ->>> # If you change the verbosity, you will see less: ->>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) ->>> ddiff -{'repetition_change': {, }, 'iterable_item_removed': {}} ->>> (repeat1, repeat2) = ddiff['repetition_change'] ->>> repeat1 - ->>> ->>> # But the verbosity level does not change the actual report object. ->>> # It only changes the textual representaion of the object. We get the actual object here: ->>> repeat1.repetition -{'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} ->>> repeat1.t1 -4 ->>> repeat1.t2 -4 ->>> repeat1.up - +```py +>>> hashes = DeepHash(obj)[obj] ``` -### List that contains dictionary (Tree View) +At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -```python ->>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} ->>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint (ddiff, indent = 2) -{ 'dictionary_item_removed': {}, - 'values_changed': {}} -Sets (Tree View): ->>> t1 = {1, 2, 8} ->>> t2 = {1, 2, 3, 5} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> print(ddiff) -{'set_item_removed': {}, 'set_item_added': {, }} ->>> # grabbing one item from set_item_removed set which has one item only ->>> (item,) = ddiff['set_item_removed'] ->>> item.up - ->>> item.up.t1 == t1 -True -``` +> - Please take a look at the [DeepHash docs](deepdiff/deephash_doc.rst) +> - The full documentation can be found on -### Named Tuples (Tree View): -```python ->>> from collections import namedtuple ->>> Point = namedtuple('Point', ['x', 'y']) ->>> t1 = Point(x=11, y=22) ->>> t2 = Point(x=11, y=23) ->>> print(DeepDiff(t1, t2, view='tree')) -{'values_changed': {}} -``` +# Using DeepDiff in unit tests -### Custom objects (Tree View): +`result` is the output of the function that is being tests. +`expected` is the expected output of the function. ```python ->>> class ClassA(object): -... a = 1 -... def __init__(self, b): -... self.b = b -... ->>> t1 = ClassA(1) ->>> t2 = ClassA(2) ->>> ->>> print(DeepDiff(t1, t2, view='tree')) -{'values_changed': {}} +self.assertEqual(DeepDiff(expected, result), {}) ``` -### Object attribute added (Tree View): +or if you are using Pytest: -```python ->>> t2.c = "new attribute" ->>> pprint(DeepDiff(t1, t2, view='tree')) -{'attribute_added': {}, - 'values_changed': {}} -``` - -### Approximate decimals comparison (Significant digits after the point) (Tree View): ```python ->>> t1 = Decimal('1.52') ->>> t2 = Decimal('1.57') ->>> DeepDiff(t1, t2, significant_digits=0, view='tree') -{} ->>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') ->>> ddiff -{'values_changed': {}} ->>> (change1,) = ddiff['values_changed'] ->>> change1 - ->>> change1.t1 -Decimal('1.52') ->>> change1.t2 -Decimal('1.57') ->>> change1.path() -'root' +assert not DeepDiff(expected, result) ``` -### Approximate float comparison (Significant digits after the point) (Tree View): +In other words, assert that there is no diff between the expected and the result. -```python ->>> t1 = [ 1.1129, 1.3359 ] ->>> t2 = [ 1.113, 1.3362 ] ->>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') ->>> ddiff -{} ->>> ddiff = DeepDiff(t1, t2, view='tree') ->>> pprint(ddiff, indent=2) -{ 'values_changed': { , - }} ->>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') ->>> ddiff -{'values_changed': {}} -``` +# Difference with Json Patch - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. +Unlike [Json Patch](https://tools.ietf.org/html/rfc6902) which is designed only for Json objects, DeepDiff is designed specifically for almost all Python types. In addition to that, DeepDiff checks for type changes and attribute value changes that Json Patch does not cover since there are no such things in Json. Last but not least, DeepDiff gives you the exact path of the item(s) that were changed in Python syntax. -## Serialization +Example in Json Patch for replacing: -DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. This works for both tree view and text view. +`{ "op": "replace", "path": "/a/b/c", "value": 42 }` -### Serialize and then deserialize back to deepdiff +Example in DeepDiff for the same operation: ```python ->>> t1 = {1: 1, 2: 2, 3: 3} ->>> t2 = {1: 1, 2: "2", 3: 3} ->>> ddiff = DeepDiff(t1, t2) ->>> jsoned = ddiff.json ->>> jsoned -'{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' ->>> ddiff_new = DeepDiff.from_json(jsoned) ->>> ddiff == ddiff_new -True +>>> item1 = {'a':{'b':{'c':'foo'}}} +>>> item2 = {'a':{'b':{'c':42}}} +>>> DeepDiff(item1, item2) +{'type_changes': {"root['a']['b']['c']": {'old_type': , 'new_value': 42, 'old_value': 'foo', 'new_type': }}} ``` -## Pycon 2016 +# Pycon 2016 I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think: @@ -788,12 +384,14 @@ I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. And here is more info: -## Documentation +# Documentation -## Change log +# ChangeLog +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. +- v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) - v3-2-1: Fixing hash of None @@ -824,20 +422,23 @@ And here is more info: - v0-5-6: Adding slots support - v0-5-5: Adding loop detection -## Authors +# Contribute -Seperman (Sep Dehpour) +1. Please make your PR against the dev branch +2. Please make sure that your PR has tests. Since DeepDiff is used in many sensitive data driven projects, we maintain 100% test coverage on the code. There are occasiannly exceptions to that rule but that is rare. -- [Github](https://github.com/seperman) -- [Linkedin](http://www.linkedin.com/in/sepehr) -- [ZepWorks](http://www.zepworks.com) +Thank you! -Victor Hahn Castell +# Authors -- [hahncastell.de](http://hahncastell.de) -- [flexoptix.net](http://www.flexoptix.net) +- Seperman (Sep Dehpour) + - [Github](https://github.com/seperman) + - [Linkedin](http://www.linkedin.com/in/sepehr) + - [ZepWorks](http://www.zepworks.com) -Also thanks to: +- Victor Hahn Castell for major contributions + - [hahncastell.de](http://hahncastell.de) + - [flexoptix.net](http://www.flexoptix.net) - nfvs for Travis-CI setup script. - brbsix for initial Py3 porting. @@ -851,3 +452,10 @@ Also thanks to: - movermeyer for updating docs - maxrothman for search in inherited class attributes - maxrothman for search for types/objects +- MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix, enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order +- Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support diff --git a/README.txt b/README.txt deleted file mode 100644 index 8950af60..00000000 --- a/README.txt +++ /dev/null @@ -1,299 +0,0 @@ -**DeepDiff v 3.3.0** - -Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes. - -Tested on Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 - -Note: Checkout the github repo's readme for complete coverage of features: -https://github.com/seperman/deepdiff - -**Parameters** - -In addition to the 2 objects being compared: - -- ignore_order -- report_repetition -- verbose_level - -**Returns** - - A DeepDiff object that has already calculated the difference of the 2 items. - -**Supported data types** - -int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - -**Examples** - - -Importing - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> from __future__ import print_function # In case running on Python 2 - -Same object returns empty - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = t1 - >>> print(DeepDiff(t1, t2)) - {} - -Type of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} - -Value of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - -Item added and/or removed - >>> t1 = {1:1, 2:2, 3:3, 4:4} - >>> t2 = {1:1, 2:4, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff) - {'dictionary_item_added': ['root[5]', 'root[6]'], - 'dictionary_item_removed': ['root[4]'], - 'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - -String difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} - >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} - - -String difference 2 - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - - >>> - >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ - @@ -1,5 +1,4 @@ - -world! - -Goodbye! - +world - 1 - 2 - End - -Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - -List difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} - -List difference 2: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} - -List difference ignoring order or duplicates: (with the same dictionaries as above) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, ignore_order=True) - >>> print (ddiff) - {} - -List that contains dictionary: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': ["root[4]['b'][2][2]"], - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} - -Sets: - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (DeepDiff(t1, t2)) - {'set_item_added': ['root[3]', 'root[5]'], 'set_item_removed': ['root[8]']} - -Named Tuples: - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> pprint (DeepDiff(t1, t2)) - {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} - -Custom objects: - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - -Object attribute added: - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': ['root.c'], - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - -Exclude certain types from comparison: - >>> l1 = logging.getLogger("test") - >>> l2 = logging.getLogger("test2") - >>> t1 = {"log": l1, 2: 1337} - >>> t2 = {"log": l2, 2: 1337} - >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) - {} - -Exclude part of your object tree from comparison: - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) - {} - - -Using DeepDiff in unit tests -result is the output of the function that is being tests. -expected is the expected output of the function. - >>> assertEqual(DeepDiff(result, expected), {}) - - -**Difference with Json Patch** - -Unlike Json Patch https://tools.ietf.org/html/rfc6902 which is designed only for Json objects, DeepDiff is designed specifically for almost all Python types. In addition to that, DeepDiff checks for type changes and attribute value changes that Json Patch does not cover since there are no such things in Json. Last but not least, DeepDiff gives you the exact path of the item(s) that were changed in Python syntax. - -Example in Json Patch for replacing: - { "op": "replace", "path": "/a/b/c", "value": 42 } - -Example in DeepDiff for the same operation: - >>> item1 = {'a':{'b':{'c':'foo'}}} - >>> item2 = {'a':{'b':{'c':42}}} - >>> DeepDiff(item1, item2) - {'type_changes': {"root['a']['b']['c']": {'old_type': , 'new_value': 42, 'old_value': 'foo', 'new_type': >> t1 = {1: 1, 2: 2, 3: 3} - >>> t2 = {1: 1, 2: "2", 3: 3} - >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json - >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True - - -**Pycon 2016** - -I was honored to give a talk about how DeepDiff does what it does at Pycon 2016. Please check out the video and let me know what you think: - -Diff It To Dig It Video -https://www.youtube.com/watch?v=J5r99eJIxF4 -And here is more info: -http://zepworks.com/blog/diff-it-to-digg-it/ - - -**Changelog** - -- v3-3-0: Searching for objects and class attributes -- v3-2-2: Adding help(deepdiff) -- v3-2-1: Fixing hash of None -- v3-2-0: Adding grep for search: object | grep(item) -- v3-1-3: Unicode vs. Bytes default fix -- v3-1-2: NotPresent Fix when item is added or removed. -- v3-1-1: Bug fix when item value is None (#58) -- v3-1-0: Serialization to/from json -- v3-0-0: Introducing Tree View -- v2-5-3: Bug fix on logging for content hash. -- v2-5-2: Bug fixes on content hash. -- v2-5-0: Adding ContentHash module to fix ignore_order once and for all. -- v2-1-0: Adding Deep Search. Now you can search for item in an object. -- v2-0-0: Exclusion patterns better coverage. Updating docs. -- v1-8-0: Exclusion patterns. -- v1-7-0: Deep Set comparison. -- v1-6-0: Unifying key names. i.e newvalue is new_value now. For backward compatibility, newvalue still works. -- v1-5-0: Fixing ignore order containers with unordered items. Adding significant digits when comparing decimals. Changes property is deprecated. -- v1-1-0: Changing Set, Dictionary and Object Attribute Add/Removal to be reported as Set instead of List. Adding Pypy compatibility. -- v1-0-2: Checking for ImmutableMapping type instead of dict -- v1-0-1: Better ignore order support -- v1-0-0: Restructuring output to make it more useful. This is NOT backward compatible. -- v0-6-1: Fixiing iterables with unhashable when order is ignored -- v0-6-0: Adding unicode support -- v0-5-9: Adding decimal support -- v0-5-8: Adding ignore order of unhashables support -- v0-5-7: Adding ignore order support -- v0-5-6: Adding slots support -- v0-5-5: Adding loop detection - -**Authors** -Sep Dehpour - -Github: https://github.com/seperman -Linkedin: http://www.linkedin.com/in/sepehr -ZepWorks: http://www.zepworks.com -Article about Deepdiff: http://zepworks.com/blog/diff-it-to-digg-it/ - -Victor Hahn Castell - -- [hahncastell.de](http://hahncastell.de) -- [flexoptix.net](http://www.flexoptix.net) - -Also thanks to: - -- nfvs for Travis-CI setup script. -- brbsix for initial Py3 porting. -- WangFenjin for unicode support. -- timoilya for comparing list of sets when ignoring order. -- Bernhard10 for significant digits comparison. -- b-jazz for PEP257 cleanup, Standardize on full names, fixing line endings. -- finnhughes for fixing __slots__ -- moloney for Unicode vs. Bytes default -- serv-inc for adding help(deepdiff) -- movermeyer for updating docs -- maxrothman for search in inherited class attributes -- maxrothman for search for types/objects diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..03fd3199 --- /dev/null +++ b/conftest.py @@ -0,0 +1,4 @@ +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'tests'))) diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 95ad688c..a64b76ab 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,4 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep and DeepHash classes.""" +# flake8: noqa +__version__ = '4.0.0' import logging if __name__ == '__main__': @@ -6,5 +8,4 @@ from .diff import DeepDiff from .search import DeepSearch, grep -from .contenthash import DeepHash -from .helper import py3 +from .deephash import DeepHash diff --git a/deepdiff/base.py b/deepdiff/base.py new file mode 100644 index 00000000..11f1b97f --- /dev/null +++ b/deepdiff/base.py @@ -0,0 +1,37 @@ +from ordered_set import OrderedSet +from deepdiff.helper import strings, numbers + + +DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES = 55 + + +class Base: + numbers = numbers + strings = strings + + def get_significant_digits(self, significant_digits, ignore_numeric_type_changes): + if ignore_numeric_type_changes and not significant_digits: + significant_digits = DEFAULT_SIGNIFICANT_DIGITS_WHEN_IGNORE_NUMERIC_TYPES + if significant_digits is not None and significant_digits < 0: + raise ValueError( + "significant_digits must be None or a non-negative integer") + return significant_digits + + def get_ignore_types_in_groups(self, ignore_type_in_groups, + ignore_string_type_changes, + ignore_numeric_type_changes): + if ignore_type_in_groups: + if isinstance(ignore_type_in_groups[0], type): + ignore_type_in_groups = [OrderedSet(ignore_type_in_groups)] + else: + ignore_type_in_groups = list(map(OrderedSet, ignore_type_in_groups)) + else: + ignore_type_in_groups = [] + + if ignore_string_type_changes and self.strings not in ignore_type_in_groups: + ignore_type_in_groups.append(OrderedSet(self.strings)) + + if ignore_numeric_type_changes and self.numbers not in ignore_type_in_groups: + ignore_type_in_groups.append(OrderedSet(self.numbers)) + + return ignore_type_in_groups diff --git a/deepdiff/contenthash.py b/deepdiff/contenthash.py deleted file mode 100644 index 501cb073..00000000 --- a/deepdiff/contenthash.py +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import print_function -import sys -from collections import Iterable -from collections import MutableMapping -from collections import defaultdict -from decimal import Decimal -from hashlib import sha1 -import logging - -from deepdiff.helper import py3, int, strings, numbers, items - -logger = logging.getLogger(__name__) - - -class Skipped(object): - def __repr__(self): - return "Skipped" # pragma: no cover - - def __str__(self): - return "Skipped" # pragma: no cover - - -class Unprocessed(object): - def __repr__(self): - return "Error: Unprocessed" # pragma: no cover - - def __str__(self): - return "Error: Unprocessed" # pragma: no cover - - -class NotHashed(object): - def __repr__(self): - return "Error: NotHashed" # pragma: no cover - - def __str__(self): - return "Error: NotHashed" # pragma: no cover - - -class DeepHash(dict): - r""" - **DeepHash** - """ - - def __init__(self, - obj, - hashes=None, - exclude_types=set(), - hasher=hash, - ignore_repetition=True, - significant_digits=None, - **kwargs): - if kwargs: - raise ValueError( - ("The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, hashes, exclude_types." - "hasher and ignore_repetition.") % ', '.join(kwargs.keys())) - self.obj = obj - self.exclude_types = set(exclude_types) - self.exclude_types_tuple = tuple( - exclude_types) # we need tuple for checking isinstance - self.ignore_repetition = ignore_repetition - - self.hasher = hasher - hashes = hashes if hashes else {} - self.update(hashes) - self['unprocessed'] = [] - self.unprocessed = Unprocessed() - self.skipped = Skipped() - self.not_hashed = NotHashed() - self.significant_digits = significant_digits - - self.__hash(obj, parents_ids=frozenset({id(obj)})) - - if self['unprocessed']: - logger.warning("Can not hash the following items: {}.".format(self['unprocessed'])) - else: - del self['unprocessed'] - - @staticmethod - def sha1hex(obj): - """Use Sha1 for more accuracy.""" - if py3: # pragma: no cover - if isinstance(obj, str): - obj = "{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') - elif isinstance(obj, bytes): - obj = type(obj).__name__.encode('utf-8') + b":" + obj - else: # pragma: no cover - if isinstance(obj, unicode): - obj = u"{}:{}".format(type(obj).__name__, obj) - obj = obj.encode('utf-8') - elif isinstance(obj, str): - obj = type(obj).__name__ + ":" + obj - return sha1(obj).hexdigest() - - @staticmethod - def __add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - - def __get_and_set_str_hash(self, obj): - obj_id = id(obj) - result = self.hasher(obj) - result = "str:{}".format(result) - self[obj_id] = result - return result - - def __hash_obj(self, obj, parents_ids=frozenset({}), is_namedtuple=False): - """Difference of 2 objects""" - try: - if is_namedtuple: - obj = obj._asdict() - else: - obj = obj.__dict__ - except AttributeError: - try: - obj = {i: getattr(obj, i) for i in obj.__slots__} - except AttributeError: - self['unprocessed'].append(obj) - return self.unprocessed - - result = self.__hash_dict(obj, parents_ids) - result = "nt{}".format(result) if is_namedtuple else "obj{}".format( - result) - return result - - def __skip_this(self, obj): - skip = False - if isinstance(obj, self.exclude_types_tuple): - skip = True - - return skip - - def __hash_dict(self, obj, parents_ids=frozenset({})): - - result = [] - obj_keys = set(obj.keys()) - - for key in obj_keys: - key_hash = self.__hash(key) - item = obj[key] - item_id = id(item) - if parents_ids and item_id in parents_ids: - continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) - hashed = self.__hash(item, parents_ids_added) - hashed = "{}:{}".format(key_hash, hashed) - result.append(hashed) - - result.sort() - result = ';'.join(result) - result = "dict:{%s}" % result - - return result - - def __hash_set(self, obj): - return "set:{}".format(self.__hash_iterable(obj)) - - def __hash_iterable(self, obj, parents_ids=frozenset({})): - - result = defaultdict(int) - - for i, x in enumerate(obj): - if self.__skip_this(x): - continue - - item_id = id(x) - if parents_ids and item_id in parents_ids: - continue - - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) - hashed = self.__hash(x, parents_ids_added) - result[hashed] += 1 - - if self.ignore_repetition: - result = list(result.keys()) - else: - result = [ - '{}|{}'.format(i[0], i[1]) for i in getattr(result, items)() - ] - - result.sort() - result = ','.join(result) - result = "{}:{}".format(type(obj).__name__, result) - - return result - - def __hash_str(self, obj): - return self.__get_and_set_str_hash(obj) - - def __hash_number(self, obj): - # Based on diff.DeepDiff.__diff_numbers - if self.significant_digits is not None and isinstance(obj, ( - float, complex, Decimal)): - obj_s = ("{:.%sf}" % self.significant_digits).format(obj) - - # Special case for 0: "-0.00" should compare equal to "0.00" - if set(obj_s) <= set("-0."): - obj_s = "0.00" - result = "number:{}".format(obj_s) - obj_id = id(obj) - self[obj_id] = result - else: - result = "{}:{}".format(type(obj).__name__, obj) - return result - - def __hash_tuple(self, obj, parents_ids): - # Checking to see if it has _fields. Which probably means it is a named - # tuple. - try: - obj._asdict - # It must be a normal tuple - except AttributeError: - result = self.__hash_iterable(obj, parents_ids) - # We assume it is a namedtuple then - else: - result = self.__hash_obj(obj, parents_ids, is_namedtuple=True) - return result - - def __hash(self, obj, parent="root", parents_ids=frozenset({})): - """The main diff method""" - - obj_id = id(obj) - if obj_id in self: - return self[obj_id] - - result = self.not_hashed - - if self.__skip_this(obj): - result = self.skipped - - elif obj is None: - result = 'NONE' - - elif isinstance(obj, strings): - result = self.__hash_str(obj) - - elif isinstance(obj, numbers): - result = self.__hash_number(obj) - - elif isinstance(obj, MutableMapping): - result = self.__hash_dict(obj, parents_ids) - - elif isinstance(obj, tuple): - result = self.__hash_tuple(obj, parents_ids) - - elif isinstance(obj, (set, frozenset)): - result = self.__hash_set(obj) - - elif isinstance(obj, Iterable): - result = self.__hash_iterable(obj, parents_ids) - - else: - result = self.__hash_obj(obj, parents_ids) - - if result != self.not_hashed and obj_id not in self and not isinstance( - obj, numbers): - self[obj_id] = result - - if result is self.not_hashed: # pragma: no cover - self[obj_id] = self.not_hashed - self['unprocessed'].append(obj) - - return result - - -if __name__ == "__main__": # pragma: no cover - if not py3: - sys.exit("Please run with Python 3 to verify the doc strings.") - import doctest - doctest.testmod() diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py new file mode 100644 index 00000000..684d5ab6 --- /dev/null +++ b/deepdiff/deephash.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import mmh3 +import logging +from collections import Iterable +from collections import MutableMapping +from collections import defaultdict +from decimal import Decimal +from hashlib import sha1 + +from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, + convert_item_or_items_into_set_else_none, current_dir, + convert_item_or_items_into_compiled_regexes_else_none, + get_id) +from deepdiff.base import Base + +logger = logging.getLogger(__name__) + +UNPROCESSED = 'unprocessed' +MURMUR_SEED = 1203 + +RESERVED_DICT_KEYS = {UNPROCESSED} +EMPTY_FROZENSET = frozenset({}) + +INDEX_VS_ATTRIBUTE = ('[%s]', '.%s') + +KEY_TO_VAL_STR = "{}:{}" + +ZERO_DECIMAL_CHARACTERS = set("-0.") + + +def prepare_string_for_hashing(obj, ignore_string_type_changes=False): + """ + Clean type conversions + """ + original_type = obj.__class__.__name__ + if isinstance(obj, bytes): + obj = obj.decode('utf-8') + if not ignore_string_type_changes: + obj = KEY_TO_VAL_STR.format(original_type, obj) + return obj + + +with open(os.path.join(current_dir, 'deephash_doc.rst'), 'r') as doc_file: + doc = doc_file.read() + + +class DeepHash(dict, Base): + __doc__ = doc + + def __init__(self, + obj, + *, + hashes=None, + exclude_types=None, + exclude_paths=None, + exclude_regex_paths=None, + hasher=None, + ignore_repetition=True, + significant_digits=None, + apply_hash=True, + ignore_type_in_groups=None, + ignore_string_type_changes=False, + ignore_numeric_type_changes=False, + **kwargs): + if kwargs: + raise ValueError( + ("The following parameter(s) are not valid: %s\n" + "The valid parameters are obj, hashes, exclude_types," + "exclude_paths, exclude_regex_paths, hasher, ignore_repetition," + "significant_digits, apply_hash, ignore_type_in_groups, ignore_string_type_changes," + "ignore_numeric_type_changes") % ', '.join(kwargs.keys())) + self.obj = obj + exclude_types = set() if exclude_types is None else set(exclude_types) + self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance + self.ignore_repetition = ignore_repetition + self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) + self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) + + self.hasher = self.murmur3_128bit if hasher is None else hasher + hashes = hashes if hashes else {} + self.update(hashes) + self[UNPROCESSED] = [] + + self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) + self.ignore_type_in_groups = self.get_ignore_types_in_groups( + ignore_type_in_groups, + ignore_string_type_changes, ignore_numeric_type_changes) + self.ignore_string_type_changes = ignore_string_type_changes + self.ignore_numeric_type_changes = ignore_numeric_type_changes + # makes the hash return constant size result if true + # the only time it should be set to False is when + # testing the individual hash functions for different types of objects. + self.apply_hash = apply_hash + + self._hash(obj, parent="root", parents_ids=frozenset({get_id(obj)})) + + if self[UNPROCESSED]: + logger.warning("Can not hash the following items: {}.".format(self[UNPROCESSED])) + else: + del self[UNPROCESSED] + + @staticmethod + def sha1hex(obj): + """Use Sha1 as a cryptographic hash.""" + obj = obj.encode('utf-8') + return sha1(obj).hexdigest() + + @staticmethod + def murmur3_64bit(obj): + """ + Use murmur3_64bit for 64 bit hash by passing this method: + hasher=DeepHash.murmur3_64bit + """ + obj = obj.encode('utf-8') + # This version of murmur3 returns two 64bit integers. + return mmh3.hash64(obj, MURMUR_SEED)[0] + + @staticmethod + def murmur3_128bit(obj): + """ + Use murmur3_128bit for bit hash by passing this method: + hasher=DeepHash.murmur3_128bit + This hasher is the default hasher. + """ + obj = obj.encode('utf-8') + return mmh3.hash128(obj, MURMUR_SEED) + + def __getitem__(self, obj): + # changed_to_id = False + key = obj + result = None + + try: + result = super().__getitem__(key) + except (TypeError, KeyError): + key = get_id(obj) + try: + result = super().__getitem__(key) + except KeyError: + raise KeyError('{} is not one of the hashed items.'.format(obj)) from None + return result + + def __contains__(self, obj): + try: + hash(obj) + except TypeError: + key = get_id(obj) + else: + key = obj + return super().__contains__(key) + + def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=False): + """Difference of 2 objects""" + original_type = type(obj) + try: + if is_namedtuple: + obj = obj._asdict() + else: + obj = obj.__dict__ + except AttributeError: + try: + obj = {i: getattr(obj, i) for i in obj.__slots__} + except AttributeError: + self[UNPROCESSED].append(obj) + return unprocessed + + result = self._prep_dict(obj, parent=parent, parents_ids=parents_ids, + print_as_attribute=True, original_type=original_type) + result = "nt{}".format(result) if is_namedtuple else "obj{}".format(result) + return result + + def _skip_this(self, obj, parent): + skip = False + if self.exclude_paths and parent in self.exclude_paths: + skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): + skip = True + else: + if self.exclude_types_tuple and isinstance(obj, self.exclude_types_tuple): + skip = True + + return skip + + def _prep_dict(self, obj, parent, parents_ids=EMPTY_FROZENSET, print_as_attribute=False, original_type=None): + + result = [] + + key_text = "%s{}".format(INDEX_VS_ATTRIBUTE[print_as_attribute]) + for key, item in obj.items(): + key_formatted = "'%s'" % key if not print_as_attribute and isinstance(key, strings) else key + key_in_report = key_text % (parent, key_formatted) + + key_hash = self._hash(key, parent=key_in_report, parents_ids=parents_ids) + item_id = get_id(item) + if (parents_ids and item_id in parents_ids) or self._skip_this(item, parent=key_in_report): + continue + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + hashed = self._hash(item, parent=key_in_report, parents_ids=parents_ids_added) + hashed = KEY_TO_VAL_STR.format(key_hash, hashed) + result.append(hashed) + + result.sort() + result = ';'.join(result) + if print_as_attribute: + type_ = original_type or type(obj) + type_str = type_.__name__ + for type_group in self.ignore_type_in_groups: + if type_ in type_group: + type_str = ','.join(map(lambda x: x.__name__, type_group)) + break + else: + type_str = 'dict' + return "%s:{%s}" % (type_str, result) + + def _prep_set(self, obj, parent, parents_ids=EMPTY_FROZENSET): + return "set:{}".format(self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids)) + + def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET): + + result = defaultdict(int) + + for i, item in enumerate(obj): + if self._skip_this(item, parent="{}[{}]".format(parent, i)): + continue + + item_id = get_id(item) + if parents_ids and item_id in parents_ids: + continue + + parents_ids_added = add_to_frozen_set(parents_ids, item_id) + hashed = self._hash(item, parent=parent, parents_ids=parents_ids_added) + # counting repetitions + result[hashed] += 1 + + if self.ignore_repetition: + result = list(result.keys()) + else: + result = [ + '{}|{}'.format(i, v) for i, v in result.items() + ] + + result = sorted(map(str, result)) # making sure the result items are string and sorted so join command works. + result = ','.join(result) + result = KEY_TO_VAL_STR.format(type(obj).__name__, result) + + return result + + def _prep_number(self, obj): + if self.significant_digits is not None and ( + self.ignore_numeric_type_changes or isinstance(obj, (float, complex, Decimal))): + obj_s = ("{:.%sf}" % self.significant_digits).format(obj) + + # Special case for 0: "-0.00" should compare equal to "0.00" + if set(obj_s) <= ZERO_DECIMAL_CHARACTERS: + obj_s = "0.00" + result = "number:{}".format(obj_s) + else: + result = KEY_TO_VAL_STR.format(type(obj).__name__, obj) + return result + + def _prep_tuple(self, obj, parent, parents_ids): + # Checking to see if it has _fields. Which probably means it is a named + # tuple. + try: + obj._asdict + # It must be a normal tuple + except AttributeError: + result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) + # We assume it is a namedtuple then + else: + result = self._prep_obj(obj, parent, parents_ids=parents_ids, is_namedtuple=True) + return result + + def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET): + """The main diff method""" + + try: + result = self[obj] + except (TypeError, KeyError): + pass + else: + return result + + result = not_hashed + + if self._skip_this(obj, parent): + return + + elif obj is None: + result = 'NONE' + + elif isinstance(obj, strings): + result = prepare_string_for_hashing(obj, ignore_string_type_changes=self.ignore_string_type_changes) + + elif isinstance(obj, numbers): + result = self._prep_number(obj) + + elif isinstance(obj, MutableMapping): + result = self._prep_dict(obj=obj, parent=parent, parents_ids=parents_ids) + + elif isinstance(obj, tuple): + result = self._prep_tuple(obj=obj, parent=parent, parents_ids=parents_ids) + + elif isinstance(obj, (set, frozenset)): + result = self._prep_set(obj=obj, parent=parent, parents_ids=parents_ids) + + elif isinstance(obj, Iterable): + result = self._prep_iterable(obj=obj, parent=parent, parents_ids=parents_ids) + + else: + result = self._prep_obj(obj=obj, parent=parent, parents_ids=parents_ids) + + if result is not_hashed: # pragma: no cover + self[UNPROCESSED].append(obj) + + elif result is unprocessed: + pass + + elif self.apply_hash: + if isinstance(obj, strings): + result_cleaned = result + else: + result_cleaned = prepare_string_for_hashing(result, ignore_string_type_changes=self.ignore_string_type_changes) + result = self.hasher(result_cleaned) + + # It is important to keep the hash of all objects. + # The hashes will be later used for comparing the objects. + try: + self[obj] = result + except TypeError: + obj_id = get_id(obj) + self[obj_id] = result + + return result + + +if __name__ == "__main__": # pragma: no cover + import doctest + doctest.testmod() diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst new file mode 100644 index 00000000..8318c161 --- /dev/null +++ b/deepdiff/deephash_doc.rst @@ -0,0 +1,204 @@ +**DeepHash** + +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. + +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +At the core of it, DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a +fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead. + +**Import** + >>> from deepdiff import DeepHash + +**Parameters** + +obj : any object, The object to be hashed based on its content. + +hashes: dictionary, default = empty dictionary + A dictionary of {object or object id: object hash} to start with. + Any object that is encountered and it is already in the hashes dictionary or its id is in the hashes dictionary, + will re-use the hash that is provided by this dictionary instead of re-calculating + its hash. This is typically used when you have a series of objects to be hashed and there might be repeats of the same object. + +exclude_types: list, default = None + List of object types to exclude from hashing. + +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can path it as a string instead of a list containing only one path. + +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can path it as a string instead of a list containing only one regex path. + +hasher: function. default = DeepHash.murmur3_128bit + hasher is the hashing function. The default is DeepHash.murmur3_128bit. + But you can pass another hash function to it if you want. + For example a cryptographic hash function or Python's builtin hash function. + All it needs is a function that takes the input in string format and returns the hash. + + You can use it by passing: hasher=hash for Python's builtin hash. + + The following alternatives are already provided: + + - hasher=DeepHash.murmur3_128bit + - hasher=DeepHash.murmur3_64bit + - hasher=DeepHash.sha1hex + +ignore_repetition: Boolean, default = True + If repetitions in an iterable should cause the hash of iterable to be different. + Note that the deepdiff diffing functionality lets this to be the default at all times. + But if you are using DeepHash directly, you can set this parameter. + +significant_digits : int >= 0, default=None + If it is a non negative integer, it compares only that many digits AFTER + the decimal point. + + This only affects floats, decimal.Decimal and complex numbers. + + Take a look at DeepDiff.diff docs for explanation of how this works. + +apply_hash: Boolean, default = True + DeepHash at its core is doing deterministic serialization of objects into strings. + Then it hashes the string. + The only time you want the apply_hash to be False is if you want to know what + the string representation of your object is BEFORE it gets hashed. + +ignore_type_in_groups + Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. + The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + + For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + + 1. Set ignore_string_type_changes=True which is the default. + 2. Set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + + Now what if you want also typeA and typeB to be ignored when comparing agains each other? + + 1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] + 2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + +ignore_string_type_changes: Boolean, default = True + string type conversions should not affect the hash output when this is set to True. + For example "Hello" and b"Hello" should produce the same hash. + +By setting it to True, both the string and bytes of hello return the same hash. + >>> DeepHash(b'hello', ignore_string_type_changes=True) + {b'hello': 221860156526691709602818861774599422448} + >>> DeepHash('hello', ignore_string_type_changes=True) + {'hello': 221860156526691709602818861774599422448} + +ignore_numeric_type_changes +Default: False + +ignore_numeric_type_changes: Boolean, default = True + numeric type conversions should not affect the hash output when this is set to True. + For example 10, 10.0 and Decimal(10) should produce the same hash. + However when ignore_numeric_type_changes is set to True, all numbers are converted + to decimals with the precision of significant_digits parameter. + If no significant_digits is passed by the user, a default value of 55 is used. + + For example if significant_digits=5, 1.1, Decimal(1.1) are both converted to 1.10000 + + That way they both produce the same hash. + + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> DeepHash(t1)[1] + 231678797214551245419120414857003063149 + >>> DeepHash(t1)[1.0] + 231678797214551245419120414857003063149 + +You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> from deepdiff import DeepDiff + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> d1 = DeepHash(burritos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d2 = DeepHash(tacos, ignore_type_in_groups=[(Taco, Burrito)]) + >>> d1[burrito] == d2[taco] + True + +**Returns** + A dictionary of {item: item hash}. + If your object is nested, it will build hashes of all the objects it contains too. + + +**Examples** + +Let's say you have a dictionary object. + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) + {1: 234041559348429806012597903916437026784, 2: 148655924348182454950690728321917595655, 'a': 119173504597196970070553896747624927922, 'b': 4994827227437929991738076607196210252, '!>*id4488569408': 32452838416412500686422093274247968754} + +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + >>> hashes = DeepHash(obj) + >>> hashes[obj] + 34150898645750099477987229399128149852 + +Which you can write as: + >>> hashes = DeepHash(obj)[obj] + +At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. + +The result hash is 34150898645750099477987229399128149852 which is generated by +Murmur 3 128bit hashing algorithm. If you prefer to use another hashing algorithm, you can pass it using the hasher parameter. Read more about Murmur3 here: https://en.wikipedia.org/wiki/MurmurHash + +If you do a deep copy of obj, it should still give you the same hash: + >>> from copy import deepcopy + >>> obj2 = deepcopy(obj) + >>> DeepHash(obj2)[obj2] + 34150898645750099477987229399128149852 + +Note that by default DeepHash will include string type differences. So if your strings were bytes: + >>> obj3 = {1: 2, b'a': b'b'} + >>> DeepHash(obj3)[obj3] + 64067525765846024488103933101621212760 + +But if you want the same hash if string types are different, set ignore_string_type_changes to True: + >>> DeepHash(obj3, ignore_string_type_changes=True)[obj3] + 34150898645750099477987229399128149852 + +ignore_numeric_type_changes is by default False too. + >>> obj1 = {4:10} + >>> obj2 = {4.0: Decimal(10.0)} + >>> DeepHash(obj1)[4] == DeepHash(obj2)[4.0] + False + +But by setting it to True, we can get the same hash. + >>> DeepHash(obj1, ignore_numeric_type_changes=True)[4] == DeepHash(obj2, ignore_numeric_type_changes=True)[4.0] + True diff --git a/deepdiff/diff.py b/deepdiff/diff.py old mode 100644 new mode 100755 index c14e5b28..ea6d41cc --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -7,606 +7,41 @@ # every time you run the docstrings. # However the docstring expects it in a specific order in order to pass! -from __future__ import absolute_import -from __future__ import print_function - import difflib import logging +import json import jsonpickle +import warnings +import os from decimal import Decimal +from itertools import zip_longest +from collections.abc import Mapping, Iterable -from collections import Mapping -from collections import Iterable +from ordered_set import OrderedSet -from deepdiff.helper import py3, strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, IndexedHash, Verbose +from deepdiff.helper import (strings, bytes_type, numbers, ListItemRemovedOrAdded, notpresent, + IndexedHash, Verbose, unprocessed, json_convertor_default, add_to_frozen_set, + convert_item_or_items_into_set_else_none, get_type, + convert_item_or_items_into_compiled_regexes_else_none, current_dir) from deepdiff.model import RemapDict, ResultDict, TextResult, TreeResult, DiffLevel -from deepdiff.model import DictRelationship, AttributeRelationship # , REPORT_KEYS +from deepdiff.model import DictRelationship, AttributeRelationship from deepdiff.model import SubscriptableIterableRelationship, NonSubscriptableIterableRelationship, SetRelationship -from deepdiff.contenthash import DeepHash - -if py3: # pragma: no cover - from itertools import zip_longest -else: # pragma: no cover - from itertools import izip_longest as zip_longest +from deepdiff.deephash import DeepHash +from deepdiff.base import Base logger = logging.getLogger(__name__) +warnings.simplefilter('once', DeprecationWarning) +TREE_VIEW = 'tree' +TEXT_VIEW = 'text' -class DeepDiff(ResultDict): - r""" - **DeepDiff** - - Deep Difference of dictionaries, iterables, strings and almost any other object. - It will recursively look for all the changes. - - DeepDiff 3.0 added the concept of views. - There is a default "text" view and a "tree" view. - - **Parameters** - - t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ - This is the first item to be compared to the second item - - t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ - The second item is to be compared to the first one - - ignore_order : Boolean, defalt=False ignores orders for iterables. - Note that if you have iterables contatining any unhashable, ignoring order can be expensive. - Normally ignore_order does not report duplicates and repetition changes. - In order to report repetitions, set report_repetition=True in addition to ignore_order=True +with open(os.path.join(current_dir, 'diff_doc.rst'), 'r') as doc_file: + doc = doc_file.read() - report_repetition : Boolean, default=False reports repetitions when set True - ONLY when ignore_order is set True too. This works for iterables. - This feature currently is experimental and is not production ready. - significant_digits : int >= 0, default=None. - If it is a non negative integer, it compares only that many digits AFTER - the decimal point. - - This only affects floats, decimal.Decimal and complex. - - Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits - - Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 - - For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) - - verbose_level : int >= 0, default = 1. - Higher verbose level shows you more details. - For example verbose level 1 shows what dictionary item are added or removed. - And verbose level 2 shows the value of the items that are added or removed too. - - exclude_paths: list, default = None. - List of paths to exclude from the report. - - exclude_types: list, default = None. - List of object types to exclude from the report. - - view: string, default = text - Starting the version 3 you can choosethe view into the deepdiff results. - The default is the text view which has been the only view up until now. - The new view is called the tree view which allows you to traverse through - the tree of changed items. - - **Returns** - - A DeepDiff object that has already calculated the difference of the 2 items. - - **Supported data types** - - int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! - - **Text View** - - Text view is the original and currently the default view of DeepDiff. - - It is called text view because the results contain texts that represent the path to the data: - - Example of using the text view. - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> print(ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} - - So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. - - .. seealso:: - The following examples are using the *default text view.* - The Tree View is introduced in DeepDiff v3 and provides - traversing capabilitie through your diffed data and more! - Read more about the Tree View at the bottom of this page. - - Importing - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - - Same object returns empty - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = t1 - >>> print(DeepDiff(t1, t2)) - {} - - Type of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'new_value': '2', - 'old_type': , - 'old_value': 2}}} - - Value of an item has changed - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} - - Item added and/or removed - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, - 'dictionary_item_removed': {'root[4]'}} - - Set verbose level to 2 in order to see the added or removed items with their values - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2, verbose_level=2) - >>> pprint(ddiff, indent=2) - { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, - 'dictionary_item_removed': {'root[4]': 4}} - - String difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} - >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, - "root[4]['b']": { 'new_value': 'world!', - 'old_value': 'world'}}} - - - String difference 2 - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' - '+++ \n' - '@@ -1,5 +1,4 @@\n' - '-world!\n' - '-Goodbye!\n' - '+world\n' - ' 1\n' - ' 2\n' - ' End', - 'new_value': 'world\n1\n2\nEnd', - 'old_value': 'world!\n' - 'Goodbye!\n' - '1\n' - '2\n' - 'End'}}} - - >>> - >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ - @@ -1,5 +1,4 @@ - -world! - -Goodbye! - +world - 1 - 2 - End - - - Type change - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'type_changes': { "root[4]['b']": { 'new_type': , - 'new_value': 'world\n\n\nEnd', - 'old_type': , - 'old_value': [1, 2, 3]}}} - - And if you don't care about the value of items that have changed type, please set verbose level to 0 - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:"2", 3:3} - >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) - { 'type_changes': { 'root[2]': { 'new_type': , - 'old_type': }}} - - List difference - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} - - List difference 2: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'iterable_item_added': {"root[4]['b'][3]": 3}, - 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, - "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} - - List difference ignoring order or duplicates: (with the same dictionaries as above) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, ignore_order=True) - >>> print (ddiff) - {} - - List difference ignoring order but reporting repetitions: - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {'root[1]': 3}, - 'repetition_change': { 'root[0]': { 'new_indexes': [2], - 'new_repeat': 1, - 'old_indexes': [0, 2], - 'old_repeat': 2, - 'value': 1}, - 'root[3]': { 'new_indexes': [0, 1], - 'new_repeat': 2, - 'old_indexes': [3], - 'old_repeat': 1, - 'value': 4}}} - - List that contains dictionary: - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {"root[4]['b'][2][2]"}, - 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} - - Sets: - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2) - >>> pprint(ddiff) - {'set_item_added': {'root[5]', 'root[3]'}, 'set_item_removed': {'root[8]'}} - - Named Tuples: - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> pprint (DeepDiff(t1, t2)) - {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} - - Custom objects: - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - - Object attribute added: - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2)) - {'attribute_added': {'root.c'}, - 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} - - Approximate decimals comparison (Significant digits after the point): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0) - {} - >>> DeepDiff(t1, t2, significant_digits=1) - {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} - - Approximate float comparison (Significant digits after the point): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> pprint(DeepDiff(t1, t2, significant_digits=3)) - {} - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} - >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) - {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} - - - .. note:: - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - - - **Tree View** - - Starting the version 3 You can chooe the view into the deepdiff results. - The tree view provides you with tree objects that you can traverse through to find - the parents of the objects that are diffed and the actual objects that are being diffed. - This view is very useful when dealing with nested objects. - Note that tree view always returns results in the form of Python sets. - - You can traverse through the tree elements! - - .. note:: - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. - - .. code:: text - - +---------------------------------------------------------------+ - | | - | parent(t1) parent node parent(t2) | - | + ^ + | - +------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | - +------|----------------------|-------------------------|-------+ - | v v v | - | child(t1) child node child(t2) | - | | - +---------------------------------------------------------------+ - - - :up: Move up to the parent node - :down: Move down to the child node - :path(): Get the path to the current node - :t1: The first item in the current node that is being diffed - :t2: The second item in the current node that is being diffed - :additional: Additional information about the node i.e. repetition - :repetition: Shortcut to get the repetition report - - - The tree view allows you to have more than mere textual representaion of the diffed objects. - It gives you the actual objects (t1, t2) throughout the tree of parents and children. - - **Examples Tree View** - - .. note:: - The Tree View is introduced in DeepDiff 3. - Set view='tree' in order to use this view. - - Value of an item has changed (Tree View) - >>> from deepdiff import DeepDiff - >>> from pprint import pprint - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') - >>> ddiff_verbose0 - {'values_changed': {}} - >>> - >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') - >>> ddiff_verbose1 - {'values_changed': {}} - >>> set_of_values_changed = ddiff_verbose1['values_changed'] - >>> # since set_of_values_changed includes only one item in a set - >>> # in order to get that one item we can: - >>> (changed,) = set_of_values_changed - >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - - >>> changed.t1 - 2 - >>> changed.t2 - 4 - >>> # You can traverse through the tree, get to the parents! - >>> changed.up - - - List difference (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> ddiff - {'iterable_item_removed': {, }} - >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. - >>> # One way to get one item from the set is to convert it to a list - >>> # And then get the first item of the list: - >>> removed = list(ddiff['iterable_item_removed'])[0] - >>> removed - - >>> - >>> parent = removed.up - >>> parent - - >>> parent.path() - "root[4]['b']" - >>> parent.t1 - [1, 2, 3, 4] - >>> parent.t2 - [1, 2] - >>> parent.up - - >>> parent.up.up - - >>> parent.up.up.t1 - {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} - >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff - True - - List difference 2 (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent = 2) - { 'iterable_item_added': {}, - 'values_changed': { , - }} - >>> - >>> # Note that iterable_item_added is a set with one item. - >>> # So in order to get that one item from it, we can do: - >>> - >>> (added,) = ddiff['iterable_item_added'] - >>> added - - >>> added.up.up - - >>> added.up.up.path() - 'root[4]' - >>> added.up.up.down - - >>> - >>> # going up twice and then down twice gives you the same node in the tree: - >>> added.up.up.down.down == added - True - - List difference ignoring order but reporting repetitions (Tree View) - >>> t1 = [1, 3, 1, 4] - >>> t2 = [4, 4, 1] - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') - >>> pprint(ddiff, indent=2) - { 'iterable_item_removed': {}, - 'repetition_change': { , - }} - >>> - >>> # repetition_change is a set with 2 items. - >>> # in order to get those 2 items, we can do the following. - >>> # or we can convert the set to list and get the list items. - >>> # or we can iterate through the set items - >>> - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 # the default verbosity is set to 1. - - >>> # The actual data regarding the repetitions can be found in the repetition attribute: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> - >>> # If you change the verbosity, you will see less: - >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) - >>> ddiff - {'repetition_change': {, }, 'iterable_item_removed': {}} - >>> (repeat1, repeat2) = ddiff['repetition_change'] - >>> repeat1 - - >>> - >>> # But the verbosity level does not change the actual report object. - >>> # It only changes the textual representaion of the object. We get the actual object here: - >>> repeat1.repetition - {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} - >>> repeat1.t1 - 4 - >>> repeat1.t2 - 4 - >>> repeat1.up - - - List that contains dictionary (Tree View) - >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} - >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint (ddiff, indent = 2) - { 'dictionary_item_removed': {}, - 'values_changed': {}} - - Sets (Tree View): - >>> t1 = {1, 2, 8} - >>> t2 = {1, 2, 3, 5} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> print(ddiff) - {'set_item_removed': {}, 'set_item_added': {, }} - >>> # grabbing one item from set_item_removed set which has one item only - >>> (item,) = ddiff['set_item_removed'] - >>> item.up - - >>> item.up.t1 == t1 - True - - Named Tuples (Tree View): - >>> from collections import namedtuple - >>> Point = namedtuple('Point', ['x', 'y']) - >>> t1 = Point(x=11, y=22) - >>> t2 = Point(x=11, y=23) - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} - - Custom objects (Tree View): - >>> class ClassA(object): - ... a = 1 - ... def __init__(self, b): - ... self.b = b - ... - >>> t1 = ClassA(1) - >>> t2 = ClassA(2) - >>> - >>> print(DeepDiff(t1, t2, view='tree')) - {'values_changed': {}} - - Object attribute added (Tree View): - >>> t2.c = "new attribute" - >>> pprint(DeepDiff(t1, t2, view='tree')) - {'attribute_added': {}, - 'values_changed': {}} - - Approximate decimals comparison (Significant digits after the point) (Tree View): - >>> t1 = Decimal('1.52') - >>> t2 = Decimal('1.57') - >>> DeepDiff(t1, t2, significant_digits=0, view='tree') - {} - >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': {}} - >>> (change1,) = ddiff['values_changed'] - >>> change1 - - >>> change1.t1 - Decimal('1.52') - >>> change1.t2 - Decimal('1.57') - >>> change1.path() - 'root' - - Approximate float comparison (Significant digits after the point) (Tree View): - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') - >>> ddiff - {} - >>> ddiff = DeepDiff(t1, t2, view='tree') - >>> pprint(ddiff, indent=2) - { 'values_changed': { , - }} - >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') - >>> ddiff - {'values_changed': {}} - - - .. note:: - All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. - - **Serialization** - - DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. - - Serialize and then deserialize back to deepdiff - >>> t1 = {1: 1, 2: 2, 3: 3} - >>> t2 = {1: 1, 2: "2", 3: 3} - >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json - >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True - - **Pycon 2016 Talk** - I gave a talk about how DeepDiff does what it does at Pycon 2016. - `Diff it to Dig it Pycon 2016 video `_ - - And here is more info: http://zepworks.com/blog/diff-it-to-digg-it/ - - - """ +class DeepDiff(ResultDict, Base): + __doc__ = doc def __init__(self, t1, @@ -614,29 +49,39 @@ def __init__(self, ignore_order=False, report_repetition=False, significant_digits=None, - exclude_paths=set(), - exclude_types=set(), + exclude_paths=None, + exclude_regex_paths=None, + exclude_types=None, + ignore_type_in_groups=None, + ignore_string_type_changes=False, + ignore_numeric_type_changes=False, verbose_level=1, - view='text', + view=TEXT_VIEW, + hasher=DeepHash.murmur3_128bit, **kwargs): if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are ignore_order, report_repetition, significant_digits," - "exclude_paths, exclude_types, verbose_level and view.") % ', '.join(kwargs.keys())) + "The valid parameters are ignore_order, report_repetition, significant_digits, " + "exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, " + "ignore_string_type_changes, ignore_numeric_type_changes, verbose_level, view, " + "and hasher.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order + self.ignore_type_in_groups = self.get_ignore_types_in_groups( + ignore_type_in_groups, + ignore_string_type_changes, ignore_numeric_type_changes) self.report_repetition = report_repetition - self.exclude_paths = set(exclude_paths) - self.exclude_types = set(exclude_types) - self.exclude_types_tuple = tuple( - exclude_types) # we need tuple for checking isinstance + self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) + self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) + self.exclude_types = set(exclude_types) if exclude_types else None + self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance + self.ignore_string_type_changes = ignore_string_type_changes + self.ignore_numeric_type_changes = ignore_numeric_type_changes self.hashes = {} + self.hasher = hasher - if significant_digits is not None and significant_digits < 0: - raise ValueError( - "significant_digits must be None or a non-negative integer") - self.significant_digits = significant_digits + self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes) self.tree = TreeResult() @@ -647,15 +92,20 @@ def __init__(self, self.tree.cleanup() - if view == 'tree': - self.update(self.tree) - del self.tree + self.view = view + view_results = self._get_view_results(view) + self.update(view_results) + + def _get_view_results(self, view): + """ + Get the results based on the view + """ + if view == TREE_VIEW: + result = self.tree else: - result_text = TextResult(tree_results=self.tree) - result_text.cleanup() # clean up text-style result dictionary - self.update( - result_text - ) # be compatible to DeepDiff 2.x if user didn't specify otherwise + result = TextResult(tree_results=self.tree) + result.cleanup() # clean up text-style result dictionary + return result # TODO: adding adding functionality # def __add__(self, other): @@ -692,26 +142,32 @@ def __report_result(self, report_type, level): level.report_type = report_type self.tree[report_type].add(level) - @staticmethod - def __add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - @staticmethod def __dict_from_slots(object): def unmangle(attribute): - if attribute.startswith('__'): + if attribute.startswith('__') and attribute != '__weakref__': return '_{type}{attribute}'.format( type=type(object).__name__, attribute=attribute ) return attribute - slots = object.__slots__ - if isinstance(slots, strings): - return {slots: getattr(object, unmangle(slots))} - return {i: getattr(object, unmangle(i)) for i in slots} + all_slots = [] + + if isinstance(object, type): + mro = object.__mro__ # pragma: no cover. I have not been able to write a test for this case. But we still check for it. + else: + mro = object.__class__.__mro__ + + for type_in_mro in mro: + slots = getattr(type_in_mro, '__slots__', None) + if slots: + if isinstance(slots, strings): + all_slots.append(slots) + else: + all_slots.extend(slots) + + return {i: getattr(object, unmangle(i)) for i in all_slots} def __diff_obj(self, level, parents_ids=frozenset({}), is_namedtuple=False): @@ -747,13 +203,33 @@ def __skip_this(self, level): skip = False if self.exclude_paths and level.path() in self.exclude_paths: skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]): + skip = True else: - if isinstance(level.t1, self.exclude_types_tuple) or isinstance( - level.t2, self.exclude_types_tuple): + if self.exclude_types_tuple and (isinstance(level.t1, self.exclude_types_tuple) or + isinstance(level.t2, self.exclude_types_tuple)): skip = True return skip + def __get_clean_to_keys_mapping(self, keys, level): + result = {} + for key in keys: + if self.ignore_string_type_changes and isinstance(key, bytes): + clean_key = key.decode('utf-8') + elif self.ignore_numeric_type_changes and type(key) in numbers: + clean_key = ("{:.%sf}" % self.significant_digits).format(key) + else: + clean_key = key + if clean_key in result: + logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes' + 'or ignore_numeric_type_changes are set to be true.').format( + key, result[clean_key], level.path())) + else: + result[clean_key] = key + return result + def __diff_dict(self, level, parents_ids=frozenset({}), @@ -782,6 +258,13 @@ def __diff_dict(self, t1_keys = set(t1.keys()) t2_keys = set(t2.keys()) + if self.ignore_string_type_changes or self.ignore_numeric_type_changes: + t1_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t1_keys, level=level) + t2_clean_to_keys = self.__get_clean_to_keys_mapping(keys=t2_keys, level=level) + t1_keys = set(t1_clean_to_keys.keys()) + t2_keys = set(t2_clean_to_keys.keys()) + else: + t1_clean_to_keys = t2_clean_to_keys = None t_keys_intersect = t2_keys.intersection(t1_keys) @@ -789,6 +272,7 @@ def __diff_dict(self, t_keys_removed = t1_keys - t_keys_intersect for key in t_keys_added: + key = t2_clean_to_keys[key] if t2_clean_to_keys else key change_level = level.branch_deeper( notpresent, t2[key], @@ -797,6 +281,7 @@ def __diff_dict(self, self.__report_result(item_added_key, change_level) for key in t_keys_removed: + key = t1_clean_to_keys[key] if t1_clean_to_keys else key change_level = level.branch_deeper( t1[key], notpresent, @@ -805,15 +290,17 @@ def __diff_dict(self, self.__report_result(item_removed_key, change_level) for key in t_keys_intersect: # key present in both dicts - need to compare values - item_id = id(t1[key]) + key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key + key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key + item_id = id(t1[key1]) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( - t1[key], - t2[key], + t1[key1], + t2[key2], child_relationship_class=rel_class, child_relationship_param=key) self.__diff(next_level, parents_ids_added) @@ -884,8 +371,7 @@ def __diff_iterable(self, level, parents_ids=frozenset({})): item_id = id(x) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, - item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) # Go one level deeper next_level = level.branch_deeper( @@ -897,20 +383,29 @@ def __diff_iterable(self, level, parents_ids=frozenset({})): def __diff_str(self, level): """Compare strings""" - if level.t1 == level.t2: + if type(level.t1) == type(level.t2) and level.t1 == level.t2: return # do we add a diff for convenience? do_diff = True + t1_str = level.t1 + t2_str = level.t2 + if isinstance(level.t1, bytes_type): try: t1_str = level.t1.decode('ascii') + except UnicodeDecodeError: + do_diff = False + + if isinstance(level.t2, bytes_type): + try: t2_str = level.t2.decode('ascii') except UnicodeDecodeError: do_diff = False - else: - t1_str = level.t1 - t2_str = level.t2 + + if t1_str == t2_str: + return + if do_diff: if u'\n' in t1_str or u'\n' in t2_str: diff = difflib.unified_diff( @@ -933,36 +428,45 @@ def __diff_tuple(self, level, parents_ids): else: self.__diff_obj(level, parents_ids, is_namedtuple=True) - def __create_hashtable(self, t, level): - """Create hashtable of {item_hash: item}""" + def _add_hash(self, hashes, item_hash, item, i): + if item_hash in hashes: + hashes[item_hash].indexes.append(i) + else: + hashes[item_hash] = IndexedHash(indexes=[i], item=item) - def add_hash(hashes, item_hash, item, i): - if item_hash in hashes: - hashes[item_hash].indexes.append(i) - else: - hashes[item_hash] = IndexedHash([i], item) + def __create_hashtable(self, t, level): + """Create hashtable of {item_hash: (indexes, item)}""" hashes = {} for (i, item) in enumerate(t): try: hashes_all = DeepHash(item, hashes=self.hashes, - significant_digits=self.significant_digits) - item_hash = hashes_all.get(id(item), item) + exclude_types=self.exclude_types, + exclude_paths=self.exclude_paths, + exclude_regex_paths=self.exclude_regex_paths, + hasher=self.hasher, + ignore_repetition=not self.report_repetition, + significant_digits=self.significant_digits, + ignore_string_type_changes=self.ignore_string_type_changes, + ignore_numeric_type_changes=self.ignore_numeric_type_changes, + ignore_type_in_groups=self.ignore_type_in_groups, + ) + item_hash = hashes_all[item] except Exception as e: # pragma: no cover logger.warning("Can not produce a hash for %s." "Not counting this object.\n %s" % (level.path(), e)) else: - if item_hash is hashes_all.unprocessed: # pragma: no cover + if item_hash is unprocessed: # pragma: no cover logger.warning("Item %s was not processed while hashing " "thus not counting this object." % level.path()) else: - add_hash(hashes, item_hash, item, i) + self._add_hash(hashes=hashes, item_hash=item_hash, item=item, i=i) return hashes - def __diff_iterable_with_contenthash(self, level): + def __diff_iterable_with_deephash(self, level): """Diff of unhashable iterables. Only used when ignoring the order.""" t1_hashtable = self.__create_hashtable(level.t1, level) t2_hashtable = self.__create_hashtable(level.t2, level) @@ -1073,10 +577,17 @@ def __diff(self, level, parents_ids=frozenset({})): if self.__skip_this(level): return - if type(level.t1) != type(level.t2): - self.__diff_types(level) + if get_type(level.t1) != get_type(level.t2): + report_type_change = True + for type_group in self.ignore_type_in_groups: + if get_type(level.t1) in type_group and get_type(level.t2) in type_group: + report_type_change = False + break + if report_type_change: + self.__diff_types(level) + return - elif isinstance(level.t1, strings): + if isinstance(level.t1, strings): self.__diff_str(level) elif isinstance(level.t1, numbers): @@ -1088,22 +599,24 @@ def __diff(self, level, parents_ids=frozenset({})): elif isinstance(level.t1, tuple): self.__diff_tuple(level, parents_ids) - elif isinstance(level.t1, (set, frozenset)): + elif isinstance(level.t1, (set, frozenset, OrderedSet)): self.__diff_set(level) elif isinstance(level.t1, Iterable): if self.ignore_order: - self.__diff_iterable_with_contenthash(level) + self.__diff_iterable_with_deephash(level) else: self.__diff_iterable(level, parents_ids) else: self.__diff_obj(level, parents_ids) - return - @property def json(self): + warnings.warn( + "json property will be deprecated. Instead use: to_json_pickle() to get the json pickle or to_json() for bare-bone json.", + DeprecationWarning + ) if not hasattr(self, '_json'): # copy of self removes all the extra attributes since it assumes # we have only a simple dictionary. @@ -1111,20 +624,76 @@ def json(self): self._json = jsonpickle.encode(copied) return self._json + def to_json_pickle(self): + """ + Get the json pickle of the diff object. Unless you need all the attributes and functionality of DeepDiff, running to_json() is the safer option that json pickle. + """ + copied = self.copy() + return jsonpickle.encode(copied) + @json.deleter def json(self): del self._json @classmethod - def from_json(self, value): + def from_json(cls, value): + warnings.warn( + "from_json is renamed to from_json_pickle", + DeprecationWarning + ) + return cls.from_json_pickle(value) + + @classmethod + def from_json_pickle(cls, value): + """ + Load DeepDiff object with all the bells and whistles from the json pickle dump. + Note that json pickle dump comes from to_json_pickle + """ return jsonpickle.decode(value) + def to_json(self, default_mapping=None): + """ + Dump json of the text view. + **Parameters** + + default_mapping : default_mapping, dictionary(optional), a dictionary of mapping of different types to json types. + + by default DeepDiff converts certain data types. For example Decimals into floats so they can be exported into json. + If you have a certain object type that the json serializer can not serialize it, please pass the appropriate type + conversion through this dictionary. + + **Example** + + Serialize custom objects + >>> class A: + ... pass + ... + >>> class B: + ... pass + ... + >>> t1 = A() + >>> t2 = B() + >>> ddiff = DeepDiff(t1, t2) + >>> ddiff.to_json() + TypeError: We do not know how to convert <__main__.A object at 0x10648> of type for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type. + + >>> default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} + >>> ddiff.to_json(default_mapping=default_mapping) + '{"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}}' + """ + return json.dumps(self.to_dict(), default=json_convertor_default(default_mapping=default_mapping)) + + def to_dict(self): + """ + Dump dictionary of the text view. It does not matter which view you are currently in. It will give you the dictionary of the text view. + """ + if self.view == TREE_VIEW: + result = dict(self._get_view_results(view=TEXT_VIEW)) + else: + result = dict(self) + return result + if __name__ == "__main__": # pragma: no cover - if not py3: - import sys - sys.exit( - "Please run with Python 3 to verify the doc strings: python3 -m deepdiff.diff" - ) import doctest doctest.testmod() diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst new file mode 100644 index 00000000..64f10929 --- /dev/null +++ b/deepdiff/diff_doc.rst @@ -0,0 +1,742 @@ +**DeepDiff** + +Deep Difference of dictionaries, iterables, strings and almost any other object. +It will recursively look for all the changes. + +DeepDiff 3.0 added the concept of views. +There is a default "text" view and a "tree" view. + +**Parameters** + +t1 : A dictionary, list, string or any python object that has __dict__ or __slots__ + This is the first item to be compared to the second item + +t2 : dictionary, list, string or almost any python object that has __dict__ or __slots__ + The second item is to be compared to the first one + +ignore_order : Boolean, defalt=False ignores orders for iterables. + Note that if you have iterables contatining any unhashable, ignoring order can be expensive. + Normally ignore_order does not report duplicates and repetition changes. + In order to report repetitions, set report_repetition=True in addition to ignore_order=True + +report_repetition : Boolean, default=False reports repetitions when set True + ONLY when ignore_order is set True too. This works for iterables. + This feature currently is experimental and is not production ready. + +significant_digits : int >= 0, default=None. + If it is a non negative integer, it compares only that many digits AFTER + the decimal point. + + This only affects floats, decimal.Decimal and complex. + + Internally it uses "{:.Xf}".format(Your Number) to compare numbers where X=significant_digits + + Note that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114 + + For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + +verbose_level : int >= 0, default = 1. + Higher verbose level shows you more details. + For example verbose level 1 shows what dictionary item are added or removed. + And verbose level 2 shows the value of the items that are added or removed too. + +exclude_paths: list, default = None + List of paths to exclude from the report. If only one item, you can path it as a string. + +exclude_regex_paths: list, default = None + List of string regex paths or compiled regex paths objects to exclude from the report. If only one item, you can pass it as a string or regex compiled object. + + +hasher: default = DeepHash.murmur3_128bit + Hash function to be used. If you don't want Murmur3, you can use Python's built-in hash function + by passing hasher=hash. This is for advanced usage and normally you don't need to modify it. + +view: string, default = text + Starting the version 3 you can choosethe view into the deepdiff results. + The default is the text view which has been the only view up until now. + The new view is called the tree view which allows you to traverse through + the tree of changed items. + +exclude_types: list, default = None + List of object types to exclude from the report. + +ignore_string_type_changes: Boolean, default = False + Whether to ignore string type changes or not. For example b"Hello" vs. "Hello" are considered the same if ignore_string_type_changes is set to True. + +ignore_numeric_type_changes: Boolean, default = False + Whether to ignore numeric type changes or not. For example 10 vs. 10.0 are considered the same if ignore_numeric_type_changes is set to True. + +ignore_type_in_groups: Tuple or List of Tuples, default = None + ignores types when t1 and t2 are both within the same type group. + +**Returns** + + A DeepDiff object that has already calculated the difference of the 2 items. + +**Supported data types** + +int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! + +**Text View** + +Text view is the original and currently the default view of DeepDiff. + +It is called text view because the results contain texts that represent the path to the data: + +Example of using the text view. + >>> from deepdiff import DeepDiff + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> print(ddiff) + {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} + +So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. + +.. seealso:: + The following examples are using the *default text view.* + The Tree View is introduced in DeepDiff v3 and provides + traversing capabilitie through your diffed data and more! + Read more about the Tree View at the bottom of this page. + +Importing + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + +Same object returns empty + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = t1 + >>> print(DeepDiff(t1, t2)) + {} + +Type of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'new_value': '2', + 'old_type': , + 'old_value': 2}}} + +Value of an item has changed + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + {'values_changed': {'root[2]': {'new_value': 4, 'old_value': 2}}} + +Item added and/or removed + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff) + {'dictionary_item_added': [root[5], root[6]], + 'dictionary_item_removed': [root[4]]} + +Set verbose level to 2 in order to see the added or removed items with their values + >>> t1 = {1:1, 3:3, 4:4} + >>> t2 = {1:1, 3:3, 5:5, 6:6} + >>> ddiff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(ddiff, indent=2) + { 'dictionary_item_added': {'root[5]': 5, 'root[6]': 6}, + 'dictionary_item_removed': {'root[4]': 4}} + +String difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world"}} + >>> t2 = {1:1, 2:4, 3:3, 4:{"a":"hello", "b":"world!"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { 'root[2]': {'new_value': 4, 'old_value': 2}, + "root[4]['b']": { 'new_value': 'world!', + 'old_value': 'world'}}} + + +String difference 2 + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world!\nGoodbye!\n1\n2\nEnd"}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n1\n2\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'values_changed': { "root[4]['b']": { 'diff': '--- \n' + '+++ \n' + '@@ -1,5 +1,4 @@\n' + '-world!\n' + '-Goodbye!\n' + '+world\n' + ' 1\n' + ' 2\n' + ' End', + 'new_value': 'world\n1\n2\nEnd', + 'old_value': 'world!\n' + 'Goodbye!\n' + '1\n' + '2\n' + 'End'}}} + + >>> + >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) + --- + +++ + @@ -1,5 +1,4 @@ + -world! + -Goodbye! + +world + 1 + 2 + End + +List difference + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} + +List difference 2: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'iterable_item_added': {"root[4]['b'][3]": 3}, + 'values_changed': { "root[4]['b'][1]": {'new_value': 3, 'old_value': 2}, + "root[4]['b'][2]": {'new_value': 2, 'old_value': 3}}} + +List difference ignoring order or duplicates: (with the same dictionaries as above) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, ignore_order=True) + >>> print (ddiff) + {} + +List difference ignoring order but reporting repetitions: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': {'root[1]': 3}, + 'repetition_change': { 'root[0]': { 'new_indexes': [2], + 'new_repeat': 1, + 'old_indexes': [0, 2], + 'old_repeat': 2, + 'value': 1}, + 'root[3]': { 'new_indexes': [0, 1], + 'new_repeat': 2, + 'old_indexes': [3], + 'old_repeat': 1, + 'value': 4}}} + +List that contains dictionary: + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': [root[4]['b'][2][2]], + 'values_changed': {"root[4]['b'][2][1]": {'new_value': 3, 'old_value': 1}}} + +Sets: + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff) + {'set_item_added': [root[3], root[5]], 'set_item_removed': [root[8]]} + +Named Tuples: + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> pprint (DeepDiff(t1, t2)) + {'values_changed': {'root.y': {'new_value': 23, 'old_value': 22}}} + +Custom objects: + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + +Object attribute added: + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2)) + {'attribute_added': [root.c], + 'values_changed': {'root.b': {'new_value': 2, 'old_value': 1}}} + +Approximate decimals comparison (Significant digits after the point): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0) + {} + >>> DeepDiff(t1, t2, significant_digits=1) + {'values_changed': {'root': {'new_value': Decimal('1.57'), 'old_value': Decimal('1.52')}}} + +Approximate float comparison (Significant digits after the point): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> pprint(DeepDiff(t1, t2, significant_digits=3)) + {} + >>> pprint(DeepDiff(t1, t2)) + {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, + 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} + >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) + {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} + + +.. note:: + All the examples for the text view work for the tree view too. + You just need to set view='tree' to get it in tree form. + + +**Ignore Type Changes** + +Type change + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":"world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint (ddiff, indent = 2) + { 'type_changes': { "root[4]['b']": { 'new_type': , + 'new_value': 'world\n\n\nEnd', + 'old_type': , + 'old_value': [1, 2, 3]}}} + +And if you don't care about the value of items that have changed type, please set verbose level to 0 + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:"2", 3:3} + >>> pprint(DeepDiff(t1, t2, verbose_level=0), indent=2) + { 'type_changes': { 'root[2]': { 'new_type': , + 'old_type': }}} + + +Exclude types + +Exclude certain types from comparison: + >>> l1 = logging.getLogger("test") + >>> l2 = logging.getLogger("test2") + >>> t1 = {"log": l1, 2: 1337} + >>> t2 = {"log": l2, 2: 1337} + >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) + {} + +ignore_type_in_groups + +Ignore type changes between members of groups of types. For example if you want to ignore type changes between float and decimals etc. Note that this is a more granular feature. Most of the times the shortcuts provided to you are enough. +The shortcuts are ignore_string_type_changes which by default is False and ignore_numeric_type_changes which is by default False. You can read more about those shortcuts in this page. ignore_type_in_groups gives you more control compared to the shortcuts. + +For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: + +1. Set ignore_string_type_changes=True which is the default. +2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . + +Now what if you want also typeA and typeB to be ignored when comparing agains each other? + +1. ignore_type_in_groups=[DeepDiff.strings, (typeA, typeB)] +2. or ignore_type_in_groups=[(str, bytes), (typeA, typeB)] + +ignore_string_type_changes +Default: False + >>> DeepDiff(b'hello', 'hello', ignore_string_type_changes=True) + {} + >>> DeepDiff(b'hello', 'hello') + {'type_changes': {'root': {'old_type': , 'new_type': , 'old_value': b'hello', 'new_value': 'hello'}}} + +ignore_numeric_type_changes +Default: False + +Ignore Type Number - Dictionary that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1: 1, 2: 2.22} + >>> t2 = {1: 1.0, 2: 2.22} + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[1]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +Ignore Type Number - List that contains float and integer: + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = [1, 2, 3] + >>> t2 = [1.0, 2.0, 3.0] + >>> ddiff = DeepDiff(t1, t2) + >>> pprint(ddiff, indent=2) + { 'type_changes': { 'root[0]': { 'new_type': , + 'new_value': 1.0, + 'old_type': , + 'old_value': 1}, + 'root[1]': { 'new_type': , + 'new_value': 2.0, + 'old_type': , + 'old_value': 2}, + 'root[2]': { 'new_type': , + 'new_value': 3.0, + 'old_type': , + 'old_value': 3}}} + >>> ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + >>> pprint(ddiff, indent=2) + {} + +You can pass a list of tuples or list of lists if you have various type groups. When t1 and t2 both fall under one of these type groups, the type change will be ignored. DeepDiff already comes with 2 groups: DeepDiff.strings and DeepDiff.numbers . If you want to pass both: + >>> ignore_type_in_groups = [DeepDiff.strings, DeepDiff.numbers] + + +ignore_type_in_groups example with custom objects: + >>> class Burrito: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> class Taco: + ... bread = 'flour' + ... def __init__(self): + ... self.spicy = True + ... + >>> + >>> burrito = Burrito() + >>> taco = Taco() + >>> + >>> burritos = [burrito] + >>> tacos = [taco] + >>> + >>> DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) + {} + + +**Tree View** + +Starting the version 3 You can chooe the view into the deepdiff results. +The tree view provides you with tree objects that you can traverse through to find +the parents of the objects that are diffed and the actual objects that are being diffed. +This view is very useful when dealing with nested objects. +Note that tree view always returns results in the form of Python sets. + +You can traverse through the tree elements! + +.. note:: + The Tree view is just a different representation of the diffed data. + Behind the scene, DeepDiff creates the tree view first and then converts it to textual + representation for the text view. + +.. code:: text + + +---------------------------------------------------------------+ + | | + | parent(t1) parent node parent(t2) | + | + ^ + | + +------|--------------------------|---------------------|-------+ + | | | up | + | Child | | | ChildRelationship + | Relationship | | | + | down | | | + +------|----------------------|-------------------------|-------+ + | v v v | + | child(t1) child node child(t2) | + | | + +---------------------------------------------------------------+ + + +:up: Move up to the parent node +:down: Move down to the child node +:path(): Get the path to the current node +:t1: The first item in the current node that is being diffed +:t2: The second item in the current node that is being diffed +:additional: Additional information about the node i.e. repetition +:repetition: Shortcut to get the repetition report + + +The tree view allows you to have more than mere textual representaion of the diffed objects. +It gives you the actual objects (t1, t2) throughout the tree of parents and children. + +**Examples Tree View** + +.. note:: + The Tree View is introduced in DeepDiff 3. + Set view='tree' in order to use this view. + +Value of an item has changed (Tree View) + >>> from deepdiff import DeepDiff + >>> from pprint import pprint + >>> t1 = {1:1, 2:2, 3:3} + >>> t2 = {1:1, 2:4, 3:3} + >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') + >>> ddiff_verbose0 + {'values_changed': []} + >>> + >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') + >>> ddiff_verbose1 + {'values_changed': []} + >>> set_of_values_changed = ddiff_verbose1['values_changed'] + >>> # since set_of_values_changed includes only one item in a set + >>> # in order to get that one item we can: + >>> (changed,) = set_of_values_changed + >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] + + >>> changed.t1 + 2 + >>> changed.t2 + 4 + >>> # You can traverse through the tree, get to the parents! + >>> changed.up + + +List difference (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3, 4]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff + {'iterable_item_removed': [, ]} + >>> # Note that the iterable_item_removed is a set. In this case it has 2 items in it. + >>> # One way to get one item from the set is to convert it to a list + >>> # And then get the first item of the list: + >>> removed = list(ddiff['iterable_item_removed'])[0] + >>> removed + + >>> + >>> parent = removed.up + >>> parent + + >>> parent.path() + "root[4]['b']" + >>> parent.t1 + [1, 2, 3, 4] + >>> parent.t2 + [1, 2] + >>> parent.up + + >>> parent.up.up + + >>> parent.up.up.t1 + {1: 1, 2: 2, 3: 3, 4: {'a': 'hello', 'b': [1, 2, 3, 4]}} + >>> parent.up.up.t1 == t1 # It is holding the original t1 that we passed to DeepDiff + True + +List difference 2 (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, 3]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 3, 2, 3]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent = 2) + { 'iterable_item_added': [], + 'values_changed': [, ]} + >>> + >>> # Note that iterable_item_added is a set with one item. + >>> # So in order to get that one item from it, we can do: + >>> + >>> (added,) = ddiff['iterable_item_added'] + >>> added + + >>> added.up.up + + >>> added.up.up.path() + 'root[4]' + >>> added.up.up.down + + >>> + >>> # going up twice and then down twice gives you the same node in the tree: + >>> added.up.up.down.down == added + True + +List difference ignoring order but reporting repetitions (Tree View) + >>> t1 = [1, 3, 1, 4] + >>> t2 = [4, 4, 1] + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree') + >>> pprint(ddiff, indent=2) + { 'iterable_item_removed': [], + 'repetition_change': [, ]} + >>> + >>> # repetition_change is a set with 2 items. + >>> # in order to get those 2 items, we can do the following. + >>> # or we can convert the set to list and get the list items. + >>> # or we can iterate through the set items + >>> + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 # the default verbosity is set to 1. + + >>> # The actual data regarding the repetitions can be found in the repetition attribute: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> + >>> # If you change the verbosity, you will see less: + >>> ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True, view='tree', verbose_level=0) + >>> ddiff + {'repetition_change': [, ], 'iterable_item_removed': []} + >>> (repeat1, repeat2) = ddiff['repetition_change'] + >>> repeat1 + + >>> + >>> # But the verbosity level does not change the actual report object. + >>> # It only changes the textual representaion of the object. We get the actual object here: + >>> repeat1.repetition + {'old_repeat': 1, 'new_repeat': 2, 'old_indexes': [3], 'new_indexes': [0, 1]} + >>> repeat1.t1 + 4 + >>> repeat1.t2 + 4 + >>> repeat1.up + + +List that contains dictionary (Tree View) + >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} + >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint (ddiff, indent = 2) + { 'dictionary_item_removed': [], + 'values_changed': []} + +Sets (Tree View): + >>> t1 = {1, 2, 8} + >>> t2 = {1, 2, 3, 5} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> print(ddiff) + {'set_item_removed': [], 'set_item_added': [, ]} + >>> # grabbing one item from set_item_removed set which has one item only + >>> (item,) = ddiff['set_item_removed'] + >>> item.up + + >>> item.up.t1 == t1 + True + +Named Tuples (Tree View): + >>> from collections import namedtuple + >>> Point = namedtuple('Point', ['x', 'y']) + >>> t1 = Point(x=11, y=22) + >>> t2 = Point(x=11, y=23) + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': []} + +Custom objects (Tree View): + >>> class ClassA(object): + ... a = 1 + ... def __init__(self, b): + ... self.b = b + ... + >>> t1 = ClassA(1) + >>> t2 = ClassA(2) + >>> + >>> print(DeepDiff(t1, t2, view='tree')) + {'values_changed': []} + +Object attribute added (Tree View): + >>> t2.c = "new attribute" + >>> pprint(DeepDiff(t1, t2, view='tree')) + {'attribute_added': [], + 'values_changed': []} + +Approximate decimals comparison (Significant digits after the point) (Tree View): + >>> t1 = Decimal('1.52') + >>> t2 = Decimal('1.57') + >>> DeepDiff(t1, t2, significant_digits=0, view='tree') + {} + >>> ddiff = DeepDiff(t1, t2, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': []} + >>> (change1,) = ddiff['values_changed'] + >>> change1 + + >>> change1.t1 + Decimal('1.52') + >>> change1.t2 + Decimal('1.57') + >>> change1.path() + 'root' + +Approximate float comparison (Significant digits after the point) (Tree View): + >>> t1 = [ 1.1129, 1.3359 ] + >>> t2 = [ 1.113, 1.3362 ] + >>> ddiff = DeepDiff(t1, t2, significant_digits=3, view='tree') + >>> ddiff + {} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> pprint(ddiff, indent=2) + { 'values_changed': [, ]} + >>> ddiff = DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1, view='tree') + >>> ddiff + {'values_changed': []} + +**Exclude paths** + +Exclude part of your object tree from comparison +use `exclude_paths` and pass a set or list of paths to exclude, if only one item is being passed, then just put it there as a string. No need to pass it as a list then. + >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} + >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} + >>> print (DeepDiff(t1, t2, exclude_paths="root['ingredients']")) # one item pass it as a string + {} + >>> print (DeepDiff(t1, t2, exclude_paths=["root['ingredients']", "root['ingredients2']"])) # multiple items pass as a list or a set. + {} + +You can also exclude using regular expressions by using `exclude_regex_paths` and pass a set or list of path regexes to exclude. The items in the list could be raw regex strings or compiled regex objects. + >>> import re + >>> t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + >>> t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + >>> print(DeepDiff(t1, t2, exclude_regex_paths=r"root\[\d+\]\['b'\]")) + {} + >>> exclude_path = re.compile(r"root\[\d+\]\['b'\]") + >>> print(DeepDiff(t1, t2, exclude_regex_paths=[exclude_path])) + {} + +example 2: + >>> t1 = {'a': [1, 2, [3, {'foo1': 'bar'}]]} + >>> t2 = {'a': [1, 2, [3, {'foo2': 'bar'}]]} + >>> DeepDiff(t1, t2, exclude_regex_paths="\['foo.'\]") # since it is one item in exclude_regex_paths, you don't have to put it in a list or a set. + {} + +Tip: DeepDiff is using re.search on the path. So if you want to force it to match from the beginning of the path, add `^` to the beginning of regex. + + + +.. note:: + All the examples for the text view work for the tree view too. You just need to set view='tree' to get it in tree form. + +**Serialization** + +In order to convert the DeepDiff object into a normal Python dictionary, use the to_dict() method. +Note that to_dict will use the text view even if you did the diff in tree view. + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_dict() + {'type_changes': {"root[4]['b']": {'old_type': , 'new_type': , 'old_value': [1, 2, 3], 'new_value': 'world\n\n\nEnd'}}} + + +In order to do safe json serialization, use the to_json() method. + +Example: + >>> t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} + >>> t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + >>> ddiff = DeepDiff(t1, t2, view='tree') + >>> ddiff.to_json() + '{"type_changes": {"root[4][\'b\']": {"old_type": "list", "new_type": "str", "old_value": [1, 2, 3], "new_value": "world\\n\\n\\nEnd"}}}' + +.. seealso:: + Take a look at to_json() documentation in this page for more details. + +If you want the original DeepDiff object to be serialized with all the bells and whistles, you can use the to_json_pickle() and to_json_pickle() in order to serialize and deserialize its results into json. Note that json_pickle is unsafe and json pickle dumps from untrusted sources should never be loaded. + +Serialize and then deserialize back to deepdiff + >>> t1 = {1: 1, 2: 2, 3: 3} + >>> t2 = {1: 1, 2: "2", 3: 3} + >>> ddiff = DeepDiff(t1, t2) + >>> jsoned = ddiff.to_json_pickle() + >>> jsoned + '{"type_changes": {"root[2]": {"new_type": {"py/type": "builtins.str"}, "new_value": "2", "old_type": {"py/type": "builtins.int"}, "old_value": 2}}}' + >>> ddiff_new = DeepDiff.from_json_pickle(jsoned) + >>> ddiff == ddiff_new + True + +**Pycon 2016 Talk** +I gave a talk about how DeepDiff does what it does at Pycon 2016. +`Diff it to Dig it Pycon 2016 video `_ + +And here is more info: http://zepworks.com/blog/diff-it-to-digg-it/ + + diff --git a/deepdiff/helper.py b/deepdiff/helper.py index 7fea2a43..471604a8 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -1,52 +1,42 @@ # -*- coding: utf-8 -*- import sys import datetime +import re +import os +import logging from decimal import Decimal from collections import namedtuple -import logging +from ordered_set import OrderedSet logger = logging.getLogger(__name__) py_major_version = sys.version[0] py_minor_version = sys.version[2] +py2 = py_major_version == '2' py3 = py_major_version == '3' +py4 = py_major_version == '4' + +if py4: + logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') # pragma: no cover + py3 = True # pragma: no cover -if (py_major_version, py_minor_version) == (2.6): # pragma: no cover - sys.exit('Python 2.6 is not supported.') +if py2: # pragma: no cover + sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') pypy3 = py3 and hasattr(sys, "pypy_translation_info") -if py3: # pragma: no cover - from builtins import int - strings = (str, bytes) # which are both basestring - unicode_type = str - bytes_type = bytes - numbers = (int, float, complex, datetime.datetime, datetime.date, Decimal) - items = 'items' -else: # pragma: no cover - int = int - strings = (str, unicode) - unicode_type = unicode - bytes_type = str - numbers = (int, float, long, complex, datetime.datetime, datetime.date, - Decimal) - - items = 'iteritems' +# from builtins import int +strings = (str, bytes) # which are both basestring +unicode_type = str +bytes_type = bytes +numbers = (int, float, complex, datetime.datetime, datetime.date, datetime.timedelta, Decimal) IndexedHash = namedtuple('IndexedHash', 'indexes item') -EXPANDED_KEY_MAP = { # pragma: no cover - 'dic_item_added': 'dictionary_item_added', - 'dic_item_removed': 'dictionary_item_removed', - 'newindexes': 'new_indexes', - 'newrepeat': 'new_repeat', - 'newtype': 'new_type', - 'newvalue': 'new_value', - 'oldindexes': 'old_indexes', - 'oldrepeat': 'old_repeat', - 'oldtype': 'old_type', - 'oldvalue': 'old_value'} +current_dir = os.path.dirname(os.path.abspath(__file__)) + +ID_PREFIX = '!>*id' def short_repr(item, max_length=15): @@ -57,59 +47,151 @@ def short_repr(item, max_length=15): return item -class ListItemRemovedOrAdded(object): # pragma: no cover +class ListItemRemovedOrAdded: # pragma: no cover """Class of conditions to be checked""" pass -class NotPresent(object): # pragma: no cover +class OtherTypes: + def __repr__(self): + return "Error: {}".format(self.__class__.__name__) # pragma: no cover + + __str__ = __repr__ + + +class Skipped(OtherTypes): + pass + + +class Unprocessed(OtherTypes): + pass + + +class NotHashed(OtherTypes): + pass + + +class NotPresent: # pragma: no cover """ In a change tree, this indicated that a previously existing object has been removed -- or will only be added in the future. We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D """ def __repr__(self): - return "Not Present" + return 'not present' # pragma: no cover - def __str__(self): - return self.__repr__() + __str__ = __repr__ +unprocessed = Unprocessed() +skipped = Skipped() +not_hashed = NotHashed() notpresent = NotPresent() -WARNING_NUM = 0 +# Disabling remapping from old to new keys since the mapping is deprecated. +RemapDict = dict -def warn(*args, **kwargs): - global WARNING_NUM - if WARNING_NUM < 10: - WARNING_NUM += 1 - logger.warning(*args, **kwargs) +# class RemapDict(dict): +# """ +# DISABLED +# Remap Dictionary. + +# For keys that have a new, longer name, remap the old key to the new key. +# Other keys that don't have a new name are handled as before. +# """ + +# def __getitem__(self, old_key): +# new_key = EXPANDED_KEY_MAP.get(old_key, old_key) +# if new_key != old_key: +# logger.warning( +# "DeepDiff Deprecation: %s is renamed to %s. Please start using " +# "the new unified naming convention.", old_key, new_key) +# if new_key in self: +# return self.get(new_key) +# else: # pragma: no cover +# raise KeyError(new_key) + + +class Verbose: + """ + Global verbose level + """ + level = 1 -class RemapDict(dict): +class indexed_set(set): """ - Remap Dictionary. + A set class that lets you get an item by index - For keys that have a new, longer name, remap the old key to the new key. - Other keys that don't have a new name are handled as before. + >>> a = indexed_set() + >>> a.add(10) + >>> a.add(20) + >>> a[0] + 10 """ - def __getitem__(self, old_key): - new_key = EXPANDED_KEY_MAP.get(old_key, old_key) - if new_key != old_key: - warn( - "DeepDiff Deprecation: %s is renamed to %s. Please start using " - "the new unified naming convention.", old_key, new_key) - if new_key in self: - return self.get(new_key) - else: # pragma: no cover - raise KeyError(new_key) +JSON_CONVERTOR = { + Decimal: float, + OrderedSet: list, + type: lambda x: x.__name__, + bytes: lambda x: x.decode('utf-8') +} + + +def json_convertor_default(default_mapping=None): + _convertor_mapping = JSON_CONVERTOR.copy() + if default_mapping: + _convertor_mapping.update(default_mapping) + + def _convertor(obj): + for original_type, convert_to in _convertor_mapping.items(): + if isinstance(obj, original_type): + return convert_to(obj) + raise TypeError('We do not know how to convert {} of type {} for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type.'.format(obj, type(obj))) + + return _convertor + + +def add_to_frozen_set(parents_ids, item_id): + return parents_ids | {item_id} + + +def convert_item_or_items_into_set_else_none(items): + if items: + if isinstance(items, strings): + items = set([items]) + else: + items = set(items) + else: + items = None + return items -class Verbose(object): + +RE_COMPILED_TYPE = type(re.compile('')) + + +def convert_item_or_items_into_compiled_regexes_else_none(items): + if items: + if isinstance(items, (strings, RE_COMPILED_TYPE)): + items = [items] + items = [i if isinstance(i, RE_COMPILED_TYPE) else re.compile(i) for i in items] + else: + items = None + return items + + +def get_id(obj): """ - Global verbose level + Adding some characters to id so they are not just integers to reduce the risk of collision. """ - level = 1 + return "{}{}".format(ID_PREFIX, id(obj)) + + +def get_type(obj): + """ + Get the type of object or if it is a class, return the class itself. + """ + return obj if type(obj) is type else type(obj) diff --git a/deepdiff/model.py b/deepdiff/model.py index 3c549a11..7ad06dec 100644 --- a/deepdiff/model.py +++ b/deepdiff/model.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- -from deepdiff.helper import items, RemapDict, strings, short_repr, Verbose, notpresent +from deepdiff.helper import RemapDict, strings, short_repr, Verbose, notpresent from ast import literal_eval from copy import copy +from ordered_set import OrderedSet FORCE_DEFAULT = 'fake' UP_DOWN = {'up': 'down', 'down': 'up'} @@ -33,16 +34,25 @@ def cleanup(self): Remove empty keys from this object. Should always be called after the result is final. :return: """ - empty_keys = [k for k, v in getattr(self, items)() if not v] + empty_keys = [k for k, v in self.items() if not v] for k in empty_keys: del self[k] +class PrettyOrderedSet(OrderedSet): + """ + From the perspective of the users of the library, they are dealing with lists. + Behind the scene, we have ordered sets. + """ + def __repr__(self): + return '[{}]'.format(", ".join(map(str, self))) + + class TreeResult(ResultDict): def __init__(self): for key in REPORT_KEYS: - self[key] = set() + self[key] = PrettyOrderedSet() class TextResult(ResultDict): @@ -59,8 +69,8 @@ def __init__(self, tree_results=None): "iterable_item_removed": {}, "attribute_added": self.__set_or_dict(), "attribute_removed": self.__set_or_dict(), - "set_item_removed": set(), - "set_item_added": set(), + "set_item_removed": PrettyOrderedSet(), + "set_item_added": PrettyOrderedSet(), "repetition_change": {} }) @@ -68,7 +78,7 @@ def __init__(self, tree_results=None): self._from_tree_results(tree_results) def __set_or_dict(self): - return {} if Verbose.level >= 2 else set() + return {} if Verbose.level >= 2 else PrettyOrderedSet() def _from_tree_results(self, tree): """ @@ -102,7 +112,7 @@ def _from_tree_default(self, tree, report_type): # do the reporting report = self[report_type] - if isinstance(report, set): + if isinstance(report, PrettyOrderedSet): report.add(change.path(force=FORCE_DEFAULT)) elif isinstance(report, dict): report[change.path(force=FORCE_DEFAULT)] = item @@ -117,13 +127,21 @@ def _from_tree_default(self, tree, report_type): def _from_tree_type_changes(self, tree): if 'type_changes' in tree: for change in tree['type_changes']: + if type(change.t1) is type: + include_values = False + old_type = change.t1 + new_type = change.t2 + else: + include_values = True + old_type = type(change.t1) + new_type = type(change.t2) remap_dict = RemapDict({ - 'old_type': type(change.t1), - 'new_type': type(change.t2) + 'old_type': old_type, + 'new_type': new_type }) self['type_changes'][change.path( force=FORCE_DEFAULT)] = remap_dict - if Verbose.level: + if Verbose.level and include_values: remap_dict.update(old_value=change.t1, new_value=change.t2) def _from_tree_value_changed(self, tree): diff --git a/deepdiff/search.py b/deepdiff/search.py index 448929f1..9f32e614 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -2,19 +2,20 @@ # -*- coding: utf-8 -*- # In order to run the docstrings: # python3 -m deepdiff.search - -from __future__ import absolute_import -from __future__ import print_function -import sys -from collections import Iterable -from collections import MutableMapping +import os +import re +from collections.abc import MutableMapping, Iterable import logging -from deepdiff.helper import py3, strings, numbers, items +from deepdiff.helper import strings, numbers, add_to_frozen_set, current_dir logger = logging.getLogger(__name__) +with open(os.path.join(current_dir, 'search_doc.rst'), 'r') as doc_file: + doc = doc_file.read() + + class DeepSearch(dict): r""" **DeepSearch** @@ -37,6 +38,12 @@ class DeepSearch(dict): exclude_types: list, default = None. List of object types to exclude from the report. + case_sensitive: Boolean, default = False + + match_string: Boolean, default = False + If True, the value of the object or its children have to exactly match the item. + If False, the value of the item can be a part of the value of the object or its children + **Returns** A DeepSearch object that has the matched paths and matched values. @@ -75,20 +82,24 @@ def __init__(self, obj, item, exclude_paths=set(), + exclude_regex_paths=set(), exclude_types=set(), verbose_level=1, case_sensitive=False, + match_string=False, **kwargs): if kwargs: raise ValueError(( "The following parameter(s) are not valid: %s\n" - "The valid parameters are obj, item, exclude_paths, exclude_types and verbose_level." + "The valid parameters are obj, item, exclude_paths, exclude_types,\n" + "case_sensitive, match_string and verbose_level." ) % ', '.join(kwargs.keys())) self.obj = obj self.case_sensitive = case_sensitive if isinstance(item, strings) else True item = item if self.case_sensitive else item.lower() self.exclude_paths = set(exclude_paths) + self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] self.exclude_types = set(exclude_types) self.exclude_types_tuple = tuple( exclude_types) # we need tuple for checking isinstance @@ -98,9 +109,12 @@ def __init__(self, matched_values=self.__set_or_dict(), unprocessed=[]) + # Cases where user wants to match exact string item + self.match_string = match_string + self.__search(obj, item, parents_ids=frozenset({id(obj)})) - empty_keys = [k for k, v in getattr(self, items)() if not v] + empty_keys = [k for k, v in self.items() if not v] for k in empty_keys: del self[k] @@ -114,12 +128,6 @@ def __report(self, report_key, key, value): else: self[report_key].add(key) - @staticmethod - def __add_to_frozen_set(parents_ids, item_id): - parents_ids = set(parents_ids) - parents_ids.add(item_id) - return frozenset(parents_ids) - def __search_obj(self, obj, item, @@ -141,7 +149,7 @@ def __search_obj(self, # Skip magic methods. Slightly hacky, but unless people are defining # new magic methods they want to search, it should work fine. obj = {i: getattr(obj, i) for i in dir(obj) - if not (i.startswith('__') and i.endswith('__'))} + if not (i.startswith('__') and i.endswith('__'))} except AttributeError: try: obj = {i: getattr(obj, i) for i in obj.__slots__} @@ -158,6 +166,9 @@ def __skip_this(self, item, parent): skip = False if parent in self.exclude_paths: skip = True + elif self.exclude_regex_paths and any( + [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): + skip = True else: if isinstance(item, self.exclude_types_tuple): skip = True @@ -191,12 +202,14 @@ def __search_dict(self, if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) new_parent = parent_text % (parent, item_key_str) new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() - if str(item) in new_parent_cased: + str_item = str(item) + if (self.match_string and str_item == new_parent_cased) or\ + (not self.match_string and str_item in new_parent_cased): self.__report( report_key='matched_paths', key=new_parent, @@ -232,14 +245,15 @@ def __search_iterable(self, item_id = id(thing) if parents_ids and item_id in parents_ids: continue - parents_ids_added = self.__add_to_frozen_set(parents_ids, item_id) + parents_ids_added = add_to_frozen_set(parents_ids, item_id) self.__search(thing, item, "%s[%s]" % (parent, i), parents_ids_added) def __search_str(self, obj, item, parent): """Compare strings""" obj_text = obj if self.case_sensitive else obj.lower() - if item in obj_text: + + if (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): self.__report(report_key='matched_values', key=parent, value=obj) def __search_numbers(self, obj, item, parent): @@ -296,35 +310,8 @@ def __search(self, obj, item, parent="root", parents_ids=frozenset({})): self.__search_obj(obj, item, parent, parents_ids) -class grep(object): - """ - **Grep!** - - grep is a new interface for Deep Search. It takes exactly the same arguments. - And it works just like grep in shell! - - **Examples** - - Importing - >>> from deepdiff import grep - >>> from pprint import pprint - - Search in list for string - >>> obj = ["long somewhere", "string", 0, "somewhere great!"] - >>> item = "somewhere" - >>> ds = obj | grep(item) - >>> print(ds) - {'matched_values': {'root[3]', 'root[0]'} - - Search in nested data for string - >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] - >>> item = "somewhere" - >>> ds = obj | grep(item, verbose_level=2) - >>> pprint(ds, indent=2) - { 'matched_paths': {"root[1]['somewhere']": 'around'}, - 'matched_values': { 'root[0]': 'something somewhere', - "root[1]['long']": 'somewhere'}} - """ +class grep: + __doc__ = doc def __init__(self, item, @@ -337,7 +324,5 @@ def __ror__(self, other): if __name__ == "__main__": # pragma: no cover - if not py3: - sys.exit("Please run with Python 3 to verify the doc strings.") import doctest doctest.testmod() diff --git a/deepdiff/search_doc.rst b/deepdiff/search_doc.rst new file mode 100644 index 00000000..cc40ab52 --- /dev/null +++ b/deepdiff/search_doc.rst @@ -0,0 +1,26 @@ +**Grep** + +grep is a new interface for Deep Search. It takes exactly the same arguments. +And it works just like grep in shell! + +**Examples** + +Importing + >>> from deepdiff import grep + >>> from pprint import pprint + +Search in list for string + >>> obj = ["long somewhere", "string", 0, "somewhere great!"] + >>> item = "somewhere" + >>> ds = obj | grep(item) + >>> print(ds) + {'matched_values': {'root[3]', 'root[0]'} + +Search in nested data for string + >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] + >>> item = "somewhere" + >>> ds = obj | grep(item, verbose_level=2) + >>> pprint(ds, indent=2) + { 'matched_paths': {"root[1]['somewhere']": 'around'}, + 'matched_values': { 'root[0]': 'something somewhere', + "root[1]['long']": 'somewhere'}} diff --git a/docs/conf.py b/docs/conf.py index 2a5df8dd..5697e04f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '3.3.0' +version = '4.0.0' # The full version, including alpha/beta/rc tags. -release = '3.3.0' +release = '4.0.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/deephash.rst b/docs/deephash.rst new file mode 100644 index 00000000..55c3041f --- /dev/null +++ b/docs/deephash.rst @@ -0,0 +1,14 @@ +:doc:`/index` + +DeepHash Reference +==================== + +.. toctree:: + :maxdepth: 3 + +.. automodule:: deepdiff.deephash + +.. autoclass:: DeepHash + :members: + +Back to :doc:`/index` diff --git a/docs/dsearch.rst b/docs/dsearch.rst index f3b07b0a..60e9753f 100644 --- a/docs/dsearch.rst +++ b/docs/dsearch.rst @@ -8,6 +8,9 @@ DeepSearch Reference .. automodule:: deepdiff.search +.. autoclass:: grep + :members: + .. autoclass:: DeepSearch :members: diff --git a/docs/index.rst b/docs/index.rst index 7418cc2a..0e168247 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,19 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -DeepDiff 3.3.0 documentation! + +DeepDiff 4.0.0 documentation! ============================= -**DeepDiff: Deep Difference of dictionaries, iterables and almost any other object recursively.** +**DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes.** + +**DeepSearch: Search for objects within other objects.** + +**DeepHash: Hash any object based on their content even if they are not "hashable".** -DeepDiff works with Python 2.7, 3.3, 3.4, 3.5, 3.6, Pypy, Pypy3 +DeepDiff works with Python 3.4, 3.5, 3.6, 3.7, Pypy3 + +NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2. ************ Installation @@ -24,23 +31,13 @@ Importing .. code:: python >>> from deepdiff import DeepDiff # For Deep Difference of 2 objects - >>> from deepdiff import DeepSearch # For finding if item exists in an object + >>> from deepdiff import grep, DeepSearch # For finding if item exists in an object + >>> from deepdiff import DeepHash # For hashing objects based on their contents ******** -Features +DeepDiff ******** -Parameters -~~~~~~~~~~ - -- t1 (the first object) -- t2 (the second object) -- `ignore\_order`_ -- `report\_repetition`_ -- `exclude\_types\_or\_paths`_ -- `significant\_digits`_ -- `views`_ - Supported data types ~~~~~~~~~~~~~~~~~~~~ @@ -66,34 +63,6 @@ List difference ignoring order or duplicates >>> print (ddiff) {} -Report repetitions -~~~~~~~~~~~~~~~~~~ - -This flag ONLY works when ignoring order is enabled. - -.. code:: python - - t1 = [1, 3, 1, 4] - t2 = [4, 4, 1] - ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - print(ddiff) - -which will print you: - -.. code:: python - - {'iterable_item_removed': {'root[1]': 3}, - 'repetition_change': {'root[0]': {'old_repeat': 2, - 'old_indexes': [0, 2], - 'new_indexes': [2], - 'value': 1, - 'new_repeat': 1}, - 'root[3]': {'old_repeat': 1, - 'old_indexes': [3], - 'new_indexes': [0, 1], - 'value': 4, - 'new_repeat': 2}}} - Exclude types or paths ~~~~~~~~~~~~~~~~~~~~~~ @@ -109,16 +78,6 @@ Exclude certain types from comparison >>> print(DeepDiff(t1, t2, exclude_types={logging.Logger})) {} -Exclude part of your object tree from comparison ------------------------------------------------- - -.. code:: python - - >>> t1 = {"for life": "vegan", "ingredients": ["no meat", "no eggs", "no dairy"]} - >>> t2 = {"for life": "vegan", "ingredients": ["veggies", "tofu", "soy sauce"]} - >>> print (DeepDiff(t1, t2, exclude_paths={"root['ingredients']"})) - {} - Significant Digits ~~~~~~~~~~~~~~~~~~ @@ -135,147 +94,25 @@ X=significant\_digits >>> DeepDiff(t1, t2, significant_digits=1) {'values_changed': {'root': {'old_value': Decimal('1.52'), 'new_value': Decimal('1.57')}}} -Approximate float comparison: ------------------------------ - -.. code:: python - - >>> t1 = [ 1.1129, 1.3359 ] - >>> t2 = [ 1.113, 1.3362 ] - >>> pprint(DeepDiff(t1, t2, significant_digits=3)) - {} - >>> pprint(DeepDiff(t1, t2)) - {'values_changed': {'root[0]': {'new_value': 1.113, 'old_value': 1.1129}, - 'root[1]': {'new_value': 1.3362, 'old_value': 1.3359}}} - >>> pprint(DeepDiff(1.23*10**20, 1.24*10**20, significant_digits=1)) - {'values_changed': {'root': {'new_value': 1.24e+20, 'old_value': 1.23e+20}}} - - -Views -~~~~~ - -Text View (default) -------------------- - -Text view is the original and currently the default view of DeepDiff. - -It is called text view because the results contain texts that represent the path to the data: - -Example of using the text view. - >>> from deepdiff import DeepDiff - >>> t1 = {1:1, 3:3, 4:4} - >>> t2 = {1:1, 3:3, 5:5, 6:6} - >>> ddiff = DeepDiff(t1, t2) - >>> print(ddiff) - {'dictionary_item_added': {'root[5]', 'root[6]'}, 'dictionary_item_removed': {'root[4]'}} - -So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. - -.. seealso:: - The following examples are using the *default text view.* - The Tree View is introduced in DeepDiff v3 and provides traversing capabilities through your diffed data and more! - Read more about the Tree View at :doc:`/diff` - -Tree View (new) ---------------- - -Starting the version v3 You can choose the view into the deepdiff results. -The tree view provides you with tree objects that you can traverse through to find -the parents of the objects that are diffed and the actual objects that are being diffed. -This view is very useful when dealing with nested objects. -Note that tree view always returns results in the form of Python sets. - -You can traverse through the tree elements! - -.. note:: - The Tree view is just a different representation of the diffed data. - Behind the scene, DeepDiff creates the tree view first and then converts it to textual representation for the text view. - -.. code:: text - - +---------------------------------------------------------------+ - | | - | parent(t1) parent node parent(t2) | - | + ^ + | - +------|--------------------------|---------------------|-------+ - | | | up | - | Child | | | ChildRelationship - | Relationship | | | - | down | | | - +------|----------------------|-------------------------|-------+ - | v v v | - | child(t1) child node child(t2) | - | | - +---------------------------------------------------------------+ - - -The tree view allows you to have more than mere textual representaion of the diffed objects. -It gives you the actual objects (t1, t2) throughout the tree of parents and children. - -:Example: - -.. code:: python - - >>> t1 = {1:1, 2:2, 3:3} - >>> t2 = {1:1, 2:4, 3:3} - >>> ddiff_verbose0 = DeepDiff(t1, t2, verbose_level=0, view='tree') - >>> ddiff_verbose0 - {'values_changed': {}} - >>> - >>> ddiff_verbose1 = DeepDiff(t1, t2, verbose_level=1, view='tree') - >>> ddiff_verbose1 - {'values_changed': {}} - >>> set_of_values_changed = ddiff_verbose1['values_changed'] - >>> # since set_of_values_changed includes only one item in a set - >>> # in order to get that one item we can: - >>> (changed,) = set_of_values_changed - >>> changed # Another way to get this is to do: changed=list(set_of_values_changed)[0] - - >>> changed.t1 - 2 - >>> changed.t2 - 4 - >>> # You can traverse through the tree, get to the parents! - >>> changed.up - - -.. seealso:: - Read more about the Tree View at :doc:`/diff` - - -Verbose Level -~~~~~~~~~~~~~ - -Verbose level by default is 1. The possible values are 0, 1 and 2. - -- verbose_level 0: won’t report values when type changed. -- verbose_level 1: default -- verbose_level 2: will report values when custom objects or - dictionaries have items added or removed. - -.. seealso:: - Read more about the verbosity at :doc:`/diff` - Serialization ~~~~~~~~~~~~~ -DeepDiff uses jsonpickle in order to serialize and deserialize its results into json. This works for both tree view and text view. - -:Serialize and then deserialize back to deepdiff: +:Serialize to json: .. code:: python >>> t1 = {1: 1, 2: 2, 3: 3} >>> t2 = {1: 1, 2: "2", 3: 3} >>> ddiff = DeepDiff(t1, t2) - >>> jsoned = ddiff.json + >>> jsoned = ddiff.to_json() >>> jsoned - '{"type_changes": {"root[2]": {"py/object": "deepdiff.helper.RemapDict", "new_type": {"py/type": "__builtin__.str"}, "new_value": "2", "old_type": {"py/type": "__builtin__.int"}, "old_value": 2}}}' - >>> ddiff_new = DeepDiff.from_json(jsoned) - >>> ddiff == ddiff_new - True + '{"type_changes": {"root[2]": {"new_type": "str", "new_value": "2", "old_type": "int", "old_value": 2}}}' + + +And many more features! Read more in +:doc:`/diff` *********** Deep Search @@ -291,16 +128,18 @@ Importing .. code:: python - >>> from deepdiff import DeepSearch + >>> from deepdiff import DeepSearch, grep >>> from pprint import pprint +DeepSearch comes with grep function which is easier to remember! + Search in list for string .. code:: python >>> obj = ["long somewhere", "string", 0, "somewhere great!"] >>> item = "somewhere" - >>> ds = DeepSearch(obj, item, verbose_level=2) + >>> ds = obj | grep(item, verbose_level=2) >>> print(ds) {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}} @@ -310,29 +149,82 @@ Search in nested data for string >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] >>> item = "somewhere" - >>> ds = DeepSearch(obj, item, verbose_level=2) + >>> ds = obj | grep(item, verbose_level=2) >>> pprint(ds, indent=2) { 'matched_paths': {"root[1]['somewhere']": 'around'}, 'matched_values': { 'root[0]': 'something somewhere', "root[1]['long']": 'somewhere'}} -.. _ignore\_order: #ignore-order -.. _report\_repetition: #report-repetitions -.. _verbose\_level: #verbose-level -.. _exclude\_types\_or\_paths: #exclude-types-or-paths -.. _significant\_digits: #significant-digits -.. _views: #views -DeepDiff Reference -================== +Read more in the Deep Search references: -:doc:`/diff` +:doc:`/dsearch` -DeepSearch Reference -==================== +********* +Deep Hash +********* +DeepHash calculates the hash of objects based on their contents in a deterministic way. +This way 2 objects with the same content should have the same hash. -:doc:`/dsearch` +The main usage of DeepHash is to calculate the hash of otherwise unhashable objects. +For example you can use DeepHash to calculate the hash of a set or a dictionary! + +The core of DeepHash is a deterministic serialization of your object into a string so it +can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. +but you can pass another hash function to it if you want. + +Let's say you have a dictionary object. + +.. code:: python + + >>> from deepdiff import DeepHash + >>> + >>> obj = {1: 2, 'a': 'b'} + +If you try to hash it: + +.. code:: python + + >>> hash(obj) + Traceback (most recent call last): + File "", line 1, in + TypeError: unhashable type: 'dict' + +But with DeepHash: + +.. code:: python + + >>> from deepdiff import DeepHash + >>> obj = {1: 2, 'a': 'b'} + >>> DeepHash(obj) + {1: 2468916477072481777512283587789292749, 2: -35787773492556653776377555218122431491, ...} + +So what is exactly the hash of obj in this case? +DeepHash is calculating the hash of the obj and any other object that obj contains. +The output of DeepHash is a dictionary of object IDs to their hashes. +In order to get the hash of obj itself, you need to use the object (or the id of object) to get its hash: + +.. code:: python + + >>> hashes = DeepHash(obj) + >>> hashes[obj] + 34150898645750099477987229399128149852 + +Read more in the Deep Hash reference: + +:doc:`/deephash` + + +References +========== + +.. toctree:: + :maxdepth: 2 + + diff + dsearch + deephash Indices and tables @@ -346,6 +238,8 @@ Indices and tables Changelog ========= +- v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. +- v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes - v3-2-2: Adding help(deepdiff) - v3-2-1: Fixing hash of None @@ -380,25 +274,35 @@ Changelog Authors ======= -Sep Dehpour - -- `Github `_ -- `ZepWorks `_ -- `Linkedin `_ -- `Article about Deepdiff `_ +- Sep Dehpour -Victor Hahn Castell + - `Github `_ + - `ZepWorks `_ + - `Linkedin `_ + - `Article about Deepdiff `_ -- `hahncastell.de `_ -- `flexoptix.net `_ +- Victor Hahn Castell for major contributions + - `hahncastell.de `_ + - `flexoptix.net `_ -ALso thanks to: - -- nfvs for Travis-CI setup script -- brbsix for initial Py3 porting -- WangFenjin for unicode support -- timoilya for comparing list of sets when ignoring order -- Bernhard10 for significant digits comparison +- nfvs for Travis-CI setup script. +- brbsix for initial Py3 porting. +- WangFenjin for unicode support. +- timoilya for comparing list of sets when ignoring order. +- Bernhard10 for significant digits comparison. - b-jazz for PEP257 cleanup, Standardize on full names, fixing line endings. -- Victor Hahn Castell @ Flexoptix for deep set comparison +- finnhughes for fixing __slots__ +- moloney for Unicode vs. Bytes default +- serv-inc for adding help(deepdiff) +- movermeyer for updating docs +- maxrothman for search in inherited class attributes +- maxrothman for search for types/objects +- MartyHub for exclude regex paths +- sreecodeslayer for DeepSearch match_string +- Brian Maissy (brianmaissy) for weakref fix, enum tests +- Bartosz Borowik (boba-2) for Exclude types fix when ignoring order +- Brian Maissy (brianmaissy) for fixing classes which inherit from classes with slots didn't have all of their slots compared +- Juan Soler (Soleronline) for adding ignore_type_number +- mthaddon for adding timedelta diffing support + diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..94563262 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +-r requirements.txt +pytest==4.0.1 +pytest-cov==2.6.0 +numpy==1.15.4 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..38a19407 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +mmh3==2.5.1 +jsonpickle==1.0 +ordered-set==3.1 diff --git a/run_tests.py b/run_tests.py deleted file mode 100755 index 3f4217dd..00000000 --- a/run_tests.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import unittest -loader = unittest.TestLoader() -tests = loader.discover('.') -testRunner = unittest.runner.TextTestRunner() -testRunner.run(tests) diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 00000000..660146f5 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1 @@ +pytest --cov=deepdiff --cov-report term-missing diff --git a/setup.py b/setup.py index 61404ddd..a8f99c38 100755 --- a/setup.py +++ b/setup.py @@ -1,19 +1,43 @@ import os +import re +import sys from setuptools import setup +if sys.version[0] == '2': # pragma: no cover + sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') + # if you are not using vagrant, just delete os.link directly, # The hard link only saves a little disk space, so you should not care if os.environ.get('USER', '') == 'vagrant': del os.link -try: - with open('README.txt') as file: - long_description = file.read() -except: - long_description = "Deep Difference and Search of any Python object/data." + +VERSIONFILE = "deepdiff/__init__.py" +with open(VERSIONFILE, "r") as the_file: + verstrline = the_file.read() +VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" +mo = re.search(VSRE, verstrline, re.M) +if mo: + verstr = mo.group(1) +else: + raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,)) + + +def get_reqs(filename): + with open(filename, "r") as reqs_file: + reqs = reqs_file.readlines() + reqs = list(map(lambda x: x.replace('==', '>='), reqs)) + return reqs + + +reqs = get_reqs("requirements.txt") + +with open('README.md') as file: + long_description = file.read() + setup(name='deepdiff', - version='3.3.0', + version=verstr, description='Deep Difference and Search of any Python object/data.', url='https://github.com/seperman/deepdiff', download_url='https://github.com/seperman/deepdiff/tarball/master', @@ -21,23 +45,21 @@ author_email='sep@zepworks.com', license='MIT', packages=['deepdiff'], - zip_safe=False, + zip_safe=True, test_suite="tests", tests_require=['mock'], # 'numpy==1.11.2' numpy is needed but comes already installed with travis long_description=long_description, - install_requires=[ - 'jsonpickle' - ], + long_description_content_type='text/markdown', + install_requires=reqs, classifiers=[ "Intended Audience :: Developers", "Operating System :: OS Independent", "Topic :: Software Development", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: Implementation :: PyPy", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: Implementation :: PyPy3", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License" ], diff --git a/tests/__init__.py b/tests/__init__.py index f23f9dc2..a0010f30 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -# To run all the tests: -# python -m unittest discover class CustomClass(object): diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py old mode 100644 new mode 100755 index 8b1a543b..3e60c4a6 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1,52 +1,28 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_text - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_diff_text.DeepDiffTextTestCase.test_same_objects - -or using nosetests: - nosetests tests/test_diff_text.py:DeepDiffTestCase.test_diff_when_hash_fails -""" -import unittest import datetime +import pytest +import logging from decimal import Decimal from deepdiff import DeepDiff -from deepdiff.helper import py3 from tests import CustomClass -if py3: - from unittest import mock -else: - import mock - -import logging +from unittest import mock logging.disable(logging.CRITICAL) -class DeepDiffTextTestCase(unittest.TestCase): +class TestDeepDiffText: """DeepDiff Tests.""" def test_same_objects(self): t1 = {1: 1, 2: 2, 3: 3} t2 = t1 - self.assertEqual(DeepDiff(t1, t2), {}) + assert {} == DeepDiff(t1, t2) def test_item_type_change(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: "2", 3: 3} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, { + assert { 'type_changes': { "root[2]": { "old_value": 2, @@ -55,20 +31,57 @@ def test_item_type_change(self): "new_type": str } } - }) + } == ddiff def test_item_type_change_less_verbose(self): t1 = {1: 1, 2: 2, 3: 3} t2 = {1: 1, 2: "2", 3: 3} - self.assertEqual( - DeepDiff( - t1, t2, verbose_level=0), - {'type_changes': { + assert {'type_changes': { "root[2]": { "old_type": int, "new_type": str } - }}) + }} == DeepDiff(t1, t2, verbose_level=0) + + def test_item_type_change_for_strings_ignored_by_default(self): + """ ignore_string_type_changes = True by default """ + + t1 = 'hello' + t2 = b'hello' + ddiff = DeepDiff(t1, t2, ignore_string_type_changes=True) + assert not ddiff + + def test_item_type_change_for_strings_override(self): + t1 = 'hello' + t2 = b'hello' + ddiff = DeepDiff(t1, t2, ignore_string_type_changes=False) + assert { + 'type_changes': { + 'root': { + 'old_type': str, + 'new_type': bytes, + 'old_value': 'hello', + 'new_value': b'hello' + } + } + } == ddiff + + def test_type_change_numeric(self): + t1 = 10 + t2 = 10.0 + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True) + assert {} == ddiff + + @pytest.mark.parametrize("t1, t2, expected_result", + [ + (10, 10.0, {}), + (10, 10.2, {'values_changed': {'root': {'new_value': 10.2, 'old_value': 10}}}), + (Decimal(10), 10.0, {}), + ({"a": Decimal(10), "b": 12, 11.0: None}, {b"b": 12, "a": 10.0, Decimal(11): None}, {}), + ]) + def test_type_change_numeric_when_ignore_order(self, t1, t2, expected_result): + ddiff = DeepDiff(t1, t2, ignore_order=True, ignore_numeric_type_changes=True, ignore_string_type_changes=True) + assert expected_result == ddiff def test_value_change(self): t1 = {1: 1, 2: 2, 3: 3} @@ -81,7 +94,7 @@ def test_value_change(self): } } } - self.assertEqual(DeepDiff(t1, t2), result) + assert result == DeepDiff(t1, t2) def test_item_added_and_removed(self): t1 = {1: 1, 2: 2, 3: 3, 4: 4} @@ -97,7 +110,7 @@ def test_item_added_and_removed(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_item_added_and_removed_verbose(self): t1 = {1: 1, 3: 3, 4: 4} @@ -112,7 +125,7 @@ def test_item_added_and_removed_verbose(self): 'root[5]': 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_diffs_dates(self): t1 = datetime.date(2016, 8, 8) @@ -126,7 +139,25 @@ def test_diffs_dates(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_diffs_timedeltas(self): + t1 = datetime.timedelta(days=1, seconds=12) + t2 = datetime.timedelta(days=1, seconds=10) + t3 = datetime.timedelta(seconds=(60*60*24) + 12) + ddiff = DeepDiff(t1, t2) + result = { + 'values_changed': { + 'root': { + 'new_value': t2, + 'old_value': t1 + } + } + } + assert result == ddiff + ddiff = DeepDiff(t1, t3) + result = {} + assert result == ddiff def test_string_difference(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world"}} @@ -144,7 +175,7 @@ def test_string_difference(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_diffs_equal_strings_when_not_identical(self): t1 = 'hello' @@ -152,7 +183,7 @@ def test_diffs_equal_strings_when_not_identical(self): t2 += 'lo' assert t1 is not t2 ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_string_difference2(self): t1 = { @@ -176,7 +207,7 @@ def test_string_difference2(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_bytes(self): t1 = { @@ -198,6 +229,7 @@ def test_bytes(self): "c": b'\x81', } } + ddiff = DeepDiff(t1, t2) result = { 'values_changed': { @@ -213,7 +245,7 @@ def test_bytes(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode(self): t1 = { @@ -237,7 +269,7 @@ def test_unicode(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_type_change(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} @@ -253,7 +285,7 @@ def test_type_change(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference(self): t1 = { @@ -273,14 +305,14 @@ def test_list_difference(self): "root[4]['b'][3]": 'to_be_removed2' } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_add(self): t1 = [1, 2] t2 = [1, 2, 3, 5] ddiff = DeepDiff(t1, t2) result = {'iterable_item_added': {'root[2]': 3, 'root[3]': 5}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference2(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3, 10, 12]}} @@ -302,7 +334,7 @@ def test_list_difference2(self): } } ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference3(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 5]}} @@ -323,7 +355,7 @@ def test_list_difference3(self): "root[4]['b'][3]": 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference4(self): # TODO: Look into Levenshtein algorithm @@ -346,31 +378,31 @@ def test_list_difference4(self): 'root[4]': 'e' } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_ignore_order(self): t1 = {1: 1, 4: {"a": "hello", "b": [1, 2, 3]}} t2 = {1: 1, 4: {"a": "hello", "b": [1, 3, 2, 3]}} ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_dictionary_difference_ignore_order(self): t1 = {"a": [[{"b": 2, "c": 4}, {"b": 2, "c": 3}]]} t2 = {"a": [[{"b": 2, "c": 3}, {"b": 2, "c": 4}]]} ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_ignore_order(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3, 3], 2, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_difference_ignore_order(self): t1 = [1, 2, [3, 4]] t2 = [[4, 3], 2, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_nested_list_with_dictionarry_difference_ignore_order(self): t1 = [1, 2, [3, 4, {1: 2}]] @@ -379,7 +411,7 @@ def test_nested_list_with_dictionarry_difference_ignore_order(self): ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_difference_ignore_order_report_repetition(self): t1 = [1, 3, 1, 4] @@ -406,7 +438,7 @@ def test_list_difference_ignore_order_report_repetition(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff # TODO: fix repeition report def test_nested_list_ignore_order_report_repetition_wrong_currently(self): @@ -424,13 +456,13 @@ def test_nested_list_ignore_order_report_repetition_wrong_currently(self): } } } - self.assertNotEqual(ddiff, result) + assert result != ddiff def test_list_of_unhashable_difference_ignore_order(self): t1 = [{"a": 2}, {"b": [3, 4, {1: 1}]}] t2 = [{"b": [3, 4, {1: 1}]}, {"a": 2}] ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_list_of_unhashable_difference_ignore_order2(self): t1 = [1, {"a": 2}, {"b": [3, 4, {1: 1}]}, "B"] @@ -447,7 +479,7 @@ def test_list_of_unhashable_difference_ignore_order2(self): 'root[0]': 1 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order3(self): t1 = [1, {"a": 2}, {"a": 2}, {"b": [3, 4, {1: 1}]}, "B"] @@ -467,7 +499,7 @@ def test_list_of_unhashable_difference_ignore_order3(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order_report_repetition( self): @@ -491,14 +523,14 @@ def test_list_of_unhashable_difference_ignore_order_report_repetition( } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order4(self): t1 = [{"a": 2}, {"a": 2}] t2 = [{"a": 2}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_unhashable_difference_ignore_order_report_repetition2( self): @@ -518,14 +550,14 @@ def test_list_of_unhashable_difference_ignore_order_report_repetition2( } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order(self): t1 = [{1}, {2}, {3}] t2 = [{4}, {1}, {2}, {3}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order_when_there_is_duplicate( self): @@ -533,7 +565,7 @@ def test_list_of_sets_difference_ignore_order_when_there_is_duplicate( t2 = [{4}, {1}, {2}, {3}, {3}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_of_sets_difference_ignore_order_when_there_is_duplicate_and_mix_of_hashable_unhashable( self): @@ -541,14 +573,14 @@ def test_list_of_sets_difference_ignore_order_when_there_is_duplicate_and_mix_of t2 = [{4}, 1, {2}, {3}, {3}, 1, 1] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {'iterable_item_added': {'root[0]': {4}}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_set_of_none(self): """ https://github.com/seperman/deepdiff/issues/64 """ ddiff = DeepDiff(set(), set([None])) - self.assertEqual(ddiff, {'set_item_added': {'root[None]'}}) + assert {'set_item_added': {'root[None]'}} == ddiff def test_list_that_contains_dictionary(self): t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, {1: 1, 2: 2}]}} @@ -563,7 +595,7 @@ def test_list_that_contains_dictionary(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_dictionary_of_list_of_dictionary_ignore_order(self): t1 = { @@ -587,7 +619,7 @@ def test_dictionary_of_list_of_dictionary_ignore_order(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_comprehensive_ignore_order(self): @@ -620,7 +652,7 @@ def test_comprehensive_ignore_order(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_ignore_order_when_objects_similar(self): """ @@ -678,7 +710,7 @@ def test_ignore_order_when_objects_similar(self): } ddiff = DeepDiff(t1, t2, ignore_order=True) - self.assertEqual(ddiff, { + assert { 'iterable_item_removed': { "root['key2'][1]": { 'key5': 'val5', @@ -691,7 +723,7 @@ def test_ignore_order_when_objects_similar(self): 'key6': 'val6' } } - }) + } == ddiff def test_set_ignore_order_report_repetition(self): """ @@ -706,7 +738,7 @@ def test_set_ignore_order_report_repetition(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {'root[8]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_set(self): t1 = {1, 2, 8} @@ -716,14 +748,14 @@ def test_set(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {'root[8]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_set_strings(self): t1 = {"veggies", "tofu"} t2 = {"veggies", "tofu", "seitan"} ddiff = DeepDiff(t1, t2) result = {'set_item_added': {"root['seitan']"}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_frozenset(self): t1 = frozenset([1, 2, 'B']) @@ -733,7 +765,7 @@ def test_frozenset(self): 'set_item_added': {'root[3]', 'root[5]'}, 'set_item_removed': {"root['B']"} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_tuple(self): t1 = (1, 2, 8) @@ -750,7 +782,7 @@ def test_tuple(self): 'root[3]': 5 } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_named_tuples(self): from collections import namedtuple @@ -766,7 +798,33 @@ def test_named_tuples(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_enums(self): + from enum import Enum + + class MyEnum(Enum): + A = 1 + B = 2 + + ddiff = DeepDiff(MyEnum.A, MyEnum(1)) + result = {} + assert ddiff == result + + ddiff = DeepDiff(MyEnum.A, MyEnum.B) + result = { + 'values_changed': { + 'root._name_': { + 'old_value': 'A', + 'new_value': 'B' + }, + 'root._value_': { + 'old_value': 1, + 'new_value': 2 + } + } + } + assert ddiff == result def test_custom_objects_change(self): t1 = CustomClass(1) @@ -780,11 +838,11 @@ def test_custom_objects_change(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_slot_change(self): - class ClassA(object): - __slots__ = ['x', 'y'] + class ClassA: + __slots__ = ('x', 'y') def __init__(self, x, y): self.x = x @@ -801,7 +859,61 @@ def __init__(self, x, y): } } } - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_custom_class_changes_with_slot_changes(self): + class ClassA: + __slots__ = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + class ClassB: + __slots__ = ['x'] + + ddiff = DeepDiff(ClassA, ClassB) + result = {'type_changes': {'root': {'old_type': ClassA, 'new_type': ClassB}}} + assert result == ddiff + + def test_custom_class_changes_with_slot_change_when_ignore_type(self): + class ClassA: + __slots__ = ['x', 'y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + class ClassB: + __slots__ = ['x'] + + ddiff = DeepDiff(ClassA, ClassB, ignore_type_in_groups=[(ClassA, ClassB)]) + result = {'iterable_item_removed': {'root.__slots__[1]': 'y'}, 'attribute_removed': {'root.__init__', 'root.y'}} + assert result == ddiff + + def test_custom_objects_slot_in_parent_class_change(self): + class ClassA: + __slots__ = ['x'] + + class ClassB(ClassA): + __slots__ = ['y'] + + def __init__(self, x, y): + self.x = x + self.y = y + + t1 = ClassB(1, 1) + t2 = ClassB(2, 1) + ddiff = DeepDiff(t1, t2) + result = { + 'values_changed': { + 'root.x': { + 'old_value': 1, + 'new_value': 2 + } + } + } + assert result == ddiff def test_custom_objects_with_single_protected_slot(self): class ClassA(object): @@ -815,8 +927,28 @@ def __str__(self): t1 = ClassA() t2 = ClassA() + ddiff = DeepDiff(t1, t2) + assert {} == ddiff + + def test_custom_objects_with_weakref_in_slots(self): + class ClassA(object): + __slots__ = ['a', '__weakref__'] + + def __init__(self, a): + self.a = a + + t1 = ClassA(1) + t2 = ClassA(2) diff = DeepDiff(t1, t2) - self.assertEqual(diff, {}) + result = { + 'values_changed': { + 'root.a': { + 'new_value': 2, + 'old_value': 1 + } + }, + } + assert result == diff def get_custom_objects_add_and_remove(self): class ClassA(object): @@ -845,7 +977,7 @@ def test_custom_objects_add_and_remove(self): }, 'attribute_removed': {'root.d'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_add_and_remove_verbose(self): t1, t2 = self.get_custom_objects_add_and_remove() @@ -864,7 +996,7 @@ def test_custom_objects_add_and_remove_verbose(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def get_custom_object_with_added_removed_methods(self): class ClassA(object): @@ -892,13 +1024,13 @@ def test_custom_objects_add_and_remove_method(self): ddiff = DeepDiff(t1, t2) result = {'attribute_added': {'root.method_a', 'root.method_b'}} - self.assertEqual(ddiff, result) + assert result == ddiff def test_custom_objects_add_and_remove_method_verbose(self): t1, t2 = self.get_custom_object_with_added_removed_methods() ddiff = DeepDiff(t1, t2, verbose_level=2) - self.assertTrue('root.method_a' in ddiff['attribute_added']) - self.assertTrue('root.method_b' in ddiff['attribute_added']) + assert 'root.method_a' in ddiff['attribute_added'] + assert 'root.method_b' in ddiff['attribute_added'] def test_set_of_custom_objects(self): member1 = CustomClass(13, 37) @@ -907,7 +1039,7 @@ def test_set_of_custom_objects(self): t2 = {member2} ddiff = DeepDiff(t1, t2) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_dictionary_of_custom_objects(self): member1 = CustomClass(13, 37) @@ -916,7 +1048,29 @@ def test_dictionary_of_custom_objects(self): t2 = {1: member2} ddiff = DeepDiff(t1, t2) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_custom_object_type_change_when_ignore_order(self): + + class Burrito: + bread = 'flour' + + def __init__(self): + self.spicy = True + + class Taco: + bread = 'flour' + + def __init__(self): + self.spicy = True + + burrito = Burrito() + taco = Taco() + + burritos = [burrito] + tacos = [taco] + + assert not DeepDiff(burritos, tacos, ignore_type_in_groups=[(Taco, Burrito)], ignore_order=True) def test_loop(self): class LoopTest(object): @@ -936,7 +1090,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop2(self): class LoopTestA(object): @@ -961,7 +1115,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop3(self): class LoopTest(object): @@ -981,7 +1135,7 @@ def __init__(self, a): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop_in_lists(self): t1 = [1, 2, 3] @@ -999,7 +1153,7 @@ def test_loop_in_lists(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_loop_in_lists2(self): t1 = [1, 2, [3]] @@ -1016,7 +1170,7 @@ def test_loop_in_lists2(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_decimal(self): t1 = {1: Decimal('10.1')} @@ -1030,162 +1184,104 @@ def test_decimal(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff def test_decimal_ignore_order(self): t1 = [{1: Decimal('10.1')}, {2: Decimal('10.2')}] t2 = [{2: Decimal('10.2')}, {1: Decimal('10.1')}] ddiff = DeepDiff(t1, t2, ignore_order=True) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_unicode_string_type_changes(self): + """ + These tests were written when DeepDiff was in Python 2. + Writing b"你好" throws an exception in Python 3 so can't be used for testing. + These tests are currently useless till they are rewritten properly. + """ unicode_string = {"hello": u"你好"} ascii_string = {"hello": "你好"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - # In python3, all string is unicode, so diff is empty - result = {} - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - "root['hello']": { - 'old_type': unicode, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbd', - 'old_value': u'\u4f60\u597d', - 'new_type': str - } - } - } - self.assertEqual(ddiff, result) + result = {} + assert result == ddiff def test_unicode_string_value_changes(self): unicode_string = {"hello": u"你好"} ascii_string = {"hello": u"你好hello"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - result = { - 'values_changed': { - "root['hello']": { - 'old_value': '你好', - 'new_value': '你好hello' - } - } - } - else: - result = { - 'values_changed': { - "root['hello']": { - 'new_value': u'\u4f60\u597dhello', - 'old_value': u'\u4f60\u597d' - } + result = { + 'values_changed': { + "root['hello']": { + 'old_value': '你好', + 'new_value': '你好hello' } } - self.assertEqual(ddiff, result) + } + assert result == ddiff def test_unicode_string_value_and_type_changes(self): unicode_string = {"hello": u"你好"} ascii_string = {"hello": "你好hello"} ddiff = DeepDiff(unicode_string, ascii_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'values_changed': { - "root['hello']": { - 'new_value': '你好hello', - 'old_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - "root['hello']": { - 'old_type': unicode, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbdhello', - 'old_value': u'\u4f60\u597d', - 'new_type': str - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'values_changed': { + "root['hello']": { + 'new_value': '你好hello', + 'old_value': '你好' } } - self.assertEqual(ddiff, result) + } + assert result == ddiff def test_int_to_unicode_string(self): t1 = 1 ascii_string = "你好" ddiff = DeepDiff(t1, ascii_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_type': str, - 'old_value': 1, - 'new_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_value': '\xe4\xbd\xa0\xe5\xa5\xbd', - 'old_value': 1, - 'new_type': str - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'type_changes': { + 'root': { + 'old_type': int, + 'new_type': str, + 'old_value': 1, + 'new_value': '你好' } } - self.assertEqual(ddiff, result) + } + assert result == ddiff def test_int_to_unicode(self): t1 = 1 unicode_string = u"你好" ddiff = DeepDiff(t1, unicode_string) - if py3: - # In python3, all string is unicode, so these 2 strings only diff - # in values - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_type': str, - 'old_value': 1, - 'new_value': '你好' - } - } - } - else: - # In python2, these are 2 different type of strings - result = { - 'type_changes': { - 'root': { - 'old_type': int, - 'new_value': u'\u4f60\u597d', - 'old_value': 1, - 'new_type': unicode - } + # In python3, all string is unicode, so these 2 strings only diff + # in values + result = { + 'type_changes': { + 'root': { + 'old_type': int, + 'new_type': str, + 'old_value': 1, + 'new_value': '你好' } } - self.assertEqual(ddiff, result) + } + assert result == ddiff def test_significant_digits_for_decimals(self): t1 = Decimal('2.5') t2 = Decimal('1.5') ddiff = DeepDiff(t1, t2, significant_digits=0) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_significant_digits_for_complex_imaginary_part(self): t1 = 1.23 + 1.222254j t2 = 1.23 + 1.222256j ddiff = DeepDiff(t1, t2, significant_digits=4) - self.assertEqual(ddiff, {}) + assert {} == ddiff result = { 'values_changed': { 'root': { @@ -1195,19 +1291,19 @@ def test_significant_digits_for_complex_imaginary_part(self): } } ddiff = DeepDiff(t1, t2, significant_digits=5) - self.assertEqual(ddiff, result) + assert result == ddiff def test_significant_digits_for_complex_real_part(self): t1 = 1.23446879 + 1.22225j t2 = 1.23446764 + 1.22225j ddiff = DeepDiff(t1, t2, significant_digits=5) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_significant_digits_for_list_of_floats(self): t1 = [1.2344, 5.67881, 6.778879] t2 = [1.2343, 5.67882, 6.778878] ddiff = DeepDiff(t1, t2, significant_digits=3) - self.assertEqual(ddiff, {}) + assert {} == ddiff ddiff = DeepDiff(t1, t2, significant_digits=4) result = { 'values_changed': { @@ -1217,7 +1313,7 @@ def test_significant_digits_for_list_of_floats(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff ddiff = DeepDiff(t1, t2, significant_digits=5) result = { 'values_changed': { @@ -1231,15 +1327,63 @@ def test_significant_digits_for_list_of_floats(self): } } } - self.assertEqual(ddiff, result) + assert result == ddiff ddiff = DeepDiff(t1, t2) ddiff2 = DeepDiff(t1, t2, significant_digits=6) - self.assertEqual(ddiff, ddiff2) + assert ddiff2 == ddiff def test_negative_significant_digits(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepDiff(1, 1, significant_digits=-1) + def test_ignore_type_in_groups(self): + t1 = [1, 2, 3] + t2 = [1.0, 2.0, 3.0] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + assert not ddiff + + def test_ignore_type_in_groups2(self): + t1 = [1, 2, 3] + t2 = [1.0, 2.0, 3.3] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=DeepDiff.numbers) + result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} + assert result == ddiff + + def test_ignore_type_in_groups_just_numbers(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[DeepDiff.numbers]) + result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}, + 'type_changes': {'root[3]': {'new_type': bytes, + 'new_value': b'a', + 'old_type': str, + 'old_value': 'a'}} + } + assert result == ddiff + + def test_ignore_type_in_groups_numbers_and_strings(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_type_in_groups=[DeepDiff.numbers, DeepDiff.strings]) + result = {'values_changed': {'root[2]': {'new_value': 3.3, 'old_value': 3}}} + assert result == ddiff + + def test_ignore_type_in_groups_numbers_and_strings_when_ignore_order(self): + t1 = [1, 2, 3, 'a'] + t2 = [1.0, 2.0, 3.3, b'a'] + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, ignore_order=True) + result = {'iterable_item_added': {'root[2]': 3.3}, 'iterable_item_removed': {'root[2]': 3}} + assert result == ddiff + + def test_ignore_string_type_changes_when_dict_keys_merge_is_not_deterministic(self): + t1 = {'a': 10, b'a': 20} + t2 = {'a': 11, b'a': 22} + ddiff = DeepDiff(t1, t2, ignore_numeric_type_changes=True, ignore_string_type_changes=True, ignore_order=True) + result = {'values_changed': {"root['a']": {'new_value': 22, 'old_value': 20}}} + alternative_result = {'values_changed': {"root['a']": {'new_value': 11, 'old_value': 10}}} + assert result == ddiff or alternative_result == ddiff + + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_base_level_dictionary_remapping(self): """ Since subclassed dictionaries that override __getitem__ treat newdict.get(key) @@ -1250,36 +1394,28 @@ def test_base_level_dictionary_remapping(self): t1 = {1: 1, 2: 2} t2 = {2: 2, 3: 3} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff['dic_item_added'], - ddiff['dictionary_item_added']) - self.assertEqual(ddiff['dic_item_removed'], - ddiff['dictionary_item_removed']) + assert ddiff['dic_item_added'] == ddiff['dictionary_item_added'] + assert ddiff['dic_item_removed'] == ddiff['dictionary_item_removed'] + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_index_and_repeat_dictionary_remapping(self): t1 = [1, 3, 1, 4] t2 = [4, 4, 1] ddiff = DeepDiff(t1, t2, ignore_order=True, report_repetition=True) - self.assertEqual(ddiff['repetition_change']['root[0]']['newindexes'], - ddiff['repetition_change']['root[0]']['new_indexes']) - self.assertEqual(ddiff['repetition_change']['root[0]']['newrepeat'], - ddiff['repetition_change']['root[0]']['new_repeat']) - self.assertEqual(ddiff['repetition_change']['root[0]']['oldindexes'], - ddiff['repetition_change']['root[0]']['old_indexes']) - self.assertEqual(ddiff['repetition_change']['root[0]']['oldrepeat'], - ddiff['repetition_change']['root[0]']['old_repeat']) + assert ddiff['repetition_change']['root[0]']['newindexes'] == ddiff['repetition_change']['root[0]']['new_indexes'] + assert ddiff['repetition_change']['root[0]']['newrepeat'] == ddiff['repetition_change']['root[0]']['new_repeat'] + assert ddiff['repetition_change']['root[0]']['oldindexes'] == ddiff['repetition_change']['root[0]']['old_indexes'] + assert ddiff['repetition_change']['root[0]']['oldrepeat'] == ddiff['repetition_change']['root[0]']['old_repeat'] + @pytest.mark.skip(reason="REMAPPING DISABLED UNTIL KEY NAMES CHANGE AGAIN IN FUTURE") def test_value_and_type_dictionary_remapping(self): t1 = {1: 1, 2: 2} t2 = {1: 1, 2: '2'} ddiff = DeepDiff(t1, t2) - self.assertEqual(ddiff['type_changes']['root[2]']['newtype'], - ddiff['type_changes']['root[2]']['new_type']) - self.assertEqual(ddiff['type_changes']['root[2]']['newvalue'], - ddiff['type_changes']['root[2]']['new_value']) - self.assertEqual(ddiff['type_changes']['root[2]']['oldtype'], - ddiff['type_changes']['root[2]']['old_type']) - self.assertEqual(ddiff['type_changes']['root[2]']['oldvalue'], - ddiff['type_changes']['root[2]']['old_value']) + assert ddiff['type_changes']['root[2]']['newtype'] == ddiff['type_changes']['root[2]']['new_type'] + assert ddiff['type_changes']['root[2]']['newvalue'] == ddiff['type_changes']['root[2]']['new_value'] + assert ddiff['type_changes']['root[2]']['oldtype'] == ddiff['type_changes']['root[2]']['old_type'] + assert ddiff['type_changes']['root[2]']['oldvalue'] == ddiff['type_changes']['root[2]']['old_value'] def test_skip_type(self): l1 = logging.getLogger("test") @@ -1287,12 +1423,12 @@ def test_skip_type(self): t1 = {"log": l1, 2: 1337} t2 = {"log": l2, 2: 1337} ddiff = DeepDiff(t1, t2, exclude_types={logging.Logger}) - self.assertEqual(ddiff, {}) + assert {} == ddiff t1 = {"log": "book", 2: 1337} t2 = {"log": l2, 2: 1337} ddiff = DeepDiff(t1, t2, exclude_types={logging.Logger}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path1(self): t1 = { @@ -1304,7 +1440,7 @@ def test_skip_path1(self): "ingredients": ["veggies", "tofu", "soy sauce"] } ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path2(self): t1 = { @@ -1313,7 +1449,7 @@ def test_skip_path2(self): } t2 = {"for life": "vegan"} ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path2_reverse(self): t1 = { @@ -1322,7 +1458,7 @@ def test_skip_path2_reverse(self): } t2 = {"for life": "vegan"} ddiff = DeepDiff(t2, t1, exclude_paths={"root['ingredients']"}) - self.assertEqual(ddiff, {}) + assert {} == ddiff def test_skip_path4(self): t1 = { @@ -1331,29 +1467,29 @@ def test_skip_path4(self): } t2 = {"for life": "vegan", "zutaten": ["veggies", "tofu", "soy sauce"]} ddiff = DeepDiff(t1, t2, exclude_paths={"root['ingredients']"}) - self.assertTrue('dictionary_item_added' in ddiff, {}) - self.assertTrue('dictionary_item_removed' not in ddiff, {}) + assert 'dictionary_item_added' in ddiff, {} + assert 'dictionary_item_removed' not in ddiff, {} def test_skip_custom_object_path(self): t1 = CustomClass(1) t2 = CustomClass(2) ddiff = DeepDiff(t1, t2, exclude_paths=['root.a']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_list_path(self): t1 = ['a', 'b'] t2 = ['a'] ddiff = DeepDiff(t1, t2, exclude_paths=['root[1]']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_dictionary_path(self): t1 = {1: {2: "a"}} t2 = {1: {}} ddiff = DeepDiff(t1, t2, exclude_paths=['root[1][2]']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff def test_skip_dictionary_path_with_custom_object(self): obj1 = CustomClass(1) @@ -1363,21 +1499,42 @@ def test_skip_dictionary_path_with_custom_object(self): t2 = {1: {2: obj2}} ddiff = DeepDiff(t1, t2, exclude_paths=['root[1][2].a']) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_skip_regexp(self): + t1 = [{'a': 1, 'b': 2}, {'c': 4, 'b': 5}] + t2 = [{'a': 1, 'b': 3}, {'c': 4, 'b': 5}] + ddiff = DeepDiff(t1, t2, exclude_regex_paths=[r"root\[\d+\]\['b'\]"]) + result = {} + assert result == ddiff def test_skip_str_type_in_dictionary(self): t1 = {1: {2: "a"}} t2 = {1: {}} ddiff = DeepDiff(t1, t2, exclude_types=[str]) result = {} - self.assertEqual(ddiff, result) + assert result == ddiff + + def test_skip_str_type_in_dict_on_list(self): + t1 = [{1: "a"}] + t2 = [{}] + ddiff = DeepDiff(t1, t2, exclude_types=[str]) + result = {} + assert result == ddiff + + def test_skip_str_type_in_dict_on_list_when_ignored_order(self): + t1 = [{1: "a"}] + t2 = [{}] + ddiff = DeepDiff(t1, t2, exclude_types=[str], ignore_order=True) + result = {} + assert result == ddiff def test_unknown_parameters(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepDiff(1, 1, wrong_param=2) def test_bad_attribute(self): - class Bad(object): + class Bad: __slots__ = ['x', 'y'] def __getattr__(self, key): @@ -1391,7 +1548,7 @@ def __str__(self): ddiff = DeepDiff(t1, t2) result = {'unprocessed': ['root: Bad Object and Bad Object']} - self.assertEqual(ddiff, result) + assert result == ddiff def test_dict_none_item_removed(self): t1 = {1: None, 2: 2} @@ -1400,7 +1557,7 @@ def test_dict_none_item_removed(self): result = { 'dictionary_item_removed': {'root[1]'} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_list_none_item_removed(self): t1 = [1, 2, None] @@ -1409,7 +1566,7 @@ def test_list_none_item_removed(self): result = { 'iterable_item_removed': {'root[2]': None} } - self.assertEqual(ddiff, result) + assert result == ddiff def test_non_subscriptable_iterable(self): def gen1(): @@ -1428,7 +1585,7 @@ def gen2(): result = {'iterable_item_removed': {'root[2]': 31337}} # Note: In text-style results, we currently pretend this stuff is subscriptable for readability - self.assertEqual(ddiff, result) + assert result == ddiff @mock.patch('deepdiff.diff.logger') @mock.patch('deepdiff.diff.DeepHash') diff --git a/tests/test_diff_tree.py b/tests/test_diff_tree.py index 4bb4f09f..fa9f299a 100644 --- a/tests/test_diff_tree.py +++ b/tests/test_diff_tree.py @@ -1,22 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_tree - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_diff_tree.DeepDiffTreeTestCase.test_same_objects -""" -import unittest +import pytest from deepdiff import DeepDiff from deepdiff.helper import pypy3, notpresent from deepdiff.model import DictRelationship, NonSubscriptableIterableRelationship @@ -25,7 +9,7 @@ logging.disable(logging.CRITICAL) -class DeepDiffTreeTestCase(unittest.TestCase): +class TestDeepDiffTree: """DeepDiff Tests.""" def test_same_objects(self): @@ -33,19 +17,19 @@ def test_same_objects(self): t2 = t1 ddiff = DeepDiff(t1, t2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} def test_significant_digits_signed_zero(self): t1 = 0.00001 t2 = -0.0001 ddiff = DeepDiff(t1, t2, significant_digits=2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} t1 = 1 * 10**-12 t2 = -1 * 10**-12 ddiff = DeepDiff(t1, t2, significant_digits=10) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} def test_item_added_extensive(self): t1 = {'one': 1, 'two': 2, 'three': 3, 'four': 4} @@ -53,59 +37,57 @@ def test_item_added_extensive(self): ddiff = DeepDiff(t1, t2) res = ddiff.tree (key, ) = res.keys() - self.assertEqual(key, 'dictionary_item_added') - self.assertEqual(len(res['dictionary_item_added']), 1) + assert key == 'dictionary_item_added' + assert len(res['dictionary_item_added']) == 1 (added1, ) = res['dictionary_item_added'] # assert added1 DiffLevel chain is valid at all - self.assertEqual(added1.up.down, added1) - self.assertIsNone(added1.down) - self.assertIsNone(added1.up.up) - self.assertEqual(added1.all_up, added1.up) - self.assertEqual(added1.up.all_down, added1) - self.assertEqual(added1.report_type, 'dictionary_item_added') + assert added1.up.down == added1 + assert added1.down is None + assert added1.up.up is None + assert added1.all_up == added1.up + assert added1.up.all_down == added1 + assert added1.report_type == 'dictionary_item_added' # assert DiffLevel chain points to the objects we entered - self.assertEqual(added1.up.t1, t1) - self.assertEqual(added1.up.t2, t2) + assert added1.up.t1 == t1 + assert added1.up.t2 == t2 - self.assertEqual(added1.t1, notpresent) - self.assertEqual(added1.t2, 1337) + assert added1.t1 is notpresent + assert added1.t2 == 1337 # assert DiffLevel child relationships are correct - self.assertIsNone(added1.up.t1_child_rel) - self.assertIsInstance(added1.up.t2_child_rel, DictRelationship) - self.assertEqual(added1.up.t2_child_rel.parent, added1.up.t2) - self.assertEqual(added1.up.t2_child_rel.child, added1.t2) - self.assertEqual(added1.up.t2_child_rel.param, 'new') + assert added1.up.t1_child_rel is None + assert isinstance(added1.up.t2_child_rel, DictRelationship) + assert added1.up.t2_child_rel.parent == added1.up.t2 + assert added1.up.t2_child_rel.child == added1.t2 + assert added1.up.t2_child_rel.param == 'new' - self.assertEqual(added1.up.path(), "root") - self.assertEqual(added1.path(), "root['new']") + assert added1.up.path() == "root" + assert added1.path() == "root['new']" def test_item_added_and_removed(self): t1 = {'one': 1, 'two': 2, 'three': 3, 'four': 4} t2 = {'one': 1, 'two': 4, 'three': 3, 'five': 5, 'six': 6} ddiff = DeepDiff(t1, t2, view='tree') - self.assertEqual( - set(ddiff.keys()), { - 'dictionary_item_added', 'dictionary_item_removed', - 'values_changed' - }) - self.assertEqual(len(ddiff['dictionary_item_added']), 2) - self.assertEqual(len(ddiff['dictionary_item_removed']), 1) + assert set(ddiff.keys()) == { + 'dictionary_item_added', 'dictionary_item_removed', + 'values_changed' + } + assert len(ddiff['dictionary_item_added']) == 2 + assert len(ddiff['dictionary_item_removed']) == 1 def test_item_added_and_removed2(self): t1 = {2: 2, 4: 4} t2 = {2: "b", 5: 5} ddiff = DeepDiff(t1, t2, view='tree') - self.assertEqual( - set(ddiff.keys()), { - 'dictionary_item_added', 'dictionary_item_removed', - 'type_changes' - }) - self.assertEqual(len(ddiff['dictionary_item_added']), 1) - self.assertEqual(len(ddiff['dictionary_item_removed']), 1) + assert set(ddiff.keys()), { + 'dictionary_item_added', 'dictionary_item_removed', + 'type_changes' + } + assert len(ddiff['dictionary_item_added']) == 1 + assert len(ddiff['dictionary_item_removed']) == 1 def test_non_subscriptable_iterable(self): t1 = (i for i in [42, 1337, 31337]) @@ -116,18 +98,18 @@ def test_non_subscriptable_iterable(self): ddiff = DeepDiff(t1, t2, view='tree') (change, ) = ddiff['iterable_item_removed'] - self.assertEqual(set(ddiff.keys()), {'iterable_item_removed'}) - self.assertEqual(len(ddiff['iterable_item_removed']), 1) + assert set(ddiff.keys()) == {'iterable_item_removed'} + assert len(ddiff['iterable_item_removed']) == 1 - self.assertEqual(change.up.t1, t1) - self.assertEqual(change.up.t2, t2) - self.assertEqual(change.report_type, 'iterable_item_removed') - self.assertEqual(change.t1, 31337) - self.assertEqual(change.t2, notpresent) + assert change.up.t1 == t1 + assert change.up.t2 == t2 + assert change.report_type == 'iterable_item_removed' + assert change.t1 == 31337 + assert change.t2 is notpresent - self.assertIsInstance(change.up.t1_child_rel, - NonSubscriptableIterableRelationship) - self.assertIsNone(change.up.t2_child_rel) + assert isinstance(change.up.t1_child_rel, + NonSubscriptableIterableRelationship) + assert change.up.t2_child_rel is None def test_non_subscriptable_iterable_path(self): t1 = (i for i in [42, 1337, 31337]) @@ -136,9 +118,9 @@ def test_non_subscriptable_iterable_path(self): (change, ) = ddiff['iterable_item_removed'] # testing path - self.assertEqual(change.path(), None) - self.assertEqual(change.path(force='yes'), 'root(unrepresentable)') - self.assertEqual(change.path(force='fake'), 'root[2]') + assert change.path() is None + assert change.path(force='yes') == 'root(unrepresentable)' + assert change.path(force='fake') == 'root[2]' def test_significant_digits(self): ddiff = DeepDiff( @@ -146,7 +128,7 @@ def test_significant_digits(self): [0.013, 0.99], significant_digits=1, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_significant_digits_with_sets(self): ddiff = DeepDiff( @@ -154,7 +136,7 @@ def test_significant_digits_with_sets(self): {0.013, 0.99}, significant_digits=1, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_significant_digits_with_ignore_order(self): ddiff = DeepDiff( @@ -162,50 +144,44 @@ def test_significant_digits_with_ignore_order(self): significant_digits=1, ignore_order=True, view='tree') - self.assertEqual(ddiff, {}) + assert ddiff == {} def test_repr(self): t1 = {1, 2, 8} t2 = {1, 2, 3, 5} ddiff = DeepDiff(t1, t2, view='tree') - try: - str(ddiff) - except Exception as e: - self.fail("Converting ddiff to string raised: {}".format(e)) + str(ddiff) -class DeepDiffTreeWithNumpyTestCase(unittest.TestCase): +class TestDeepDiffTreeWithNumpy: """DeepDiff Tests with Numpy.""" - def setUp(self): - if not pypy3: - import numpy as np - a1 = np.array([1.23, 1.66, 1.98]) - a2 = np.array([1.23, 1.66, 1.98]) - self.d1 = {'np': a1} - self.d2 = {'np': a2} - - @unittest.skipIf(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, reason="Numpy is not compatible with pypy3") def test_diff_with_numpy(self): - ddiff = DeepDiff(self.d1, self.d2) + import numpy as np + a1 = np.array([1.23, 1.66, 1.98]) + a2 = np.array([1.23, 1.66, 1.98]) + d1 = {'np': a1} + d2 = {'np': a2} + ddiff = DeepDiff(d1, d2) res = ddiff.tree - self.assertEqual(res, {}) + assert res == {} - @unittest.skipIf(pypy3, "Numpy is not compatible with pypy3") + @pytest.mark.skipif(pypy3, reason="Numpy is not compatible with pypy3") def test_diff_with_empty_seq(self): a1 = {"empty": []} a2 = {"empty": []} ddiff = DeepDiff(a1, a2) - self.assertEqual(ddiff, {}) + assert ddiff == {} -class DeepAdditionsTestCase(unittest.TestCase): +class TestDeepAdditions: """Tests for Additions and Subtractions.""" - @unittest.expectedFailure + @pytest.mark.skip(reason="Not currently implemented") def test_adding_list_diff(self): t1 = [1, 2] t2 = [1, 2, 3, 5] ddiff = DeepDiff(t1, t2, view='tree') addition = ddiff + t1 - self.assertEqual(addition, t2) + assert addition == t2 diff --git a/tests/test_hash.py b/tests/test_hash.py old mode 100644 new mode 100755 index 8361faf6..2f10b478 --- a/tests/test_hash.py +++ b/tests/test_hash.py @@ -1,364 +1,569 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_hash - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - On linux: - nosetests ./tests/test_hash.py:DeepHashTestCase.test_bytecode - - On windows: - nosetests .\tests\test_hash.py:DeepHashTestCase.test_string_in_root -""" -import unittest -from deepdiff import DeepHash -from deepdiff.helper import py3, pypy3 -from collections import namedtuple -import logging - -logging.disable(logging.CRITICAL) - - -class CustomClass: - def __init__(self, a, b=None): - self.a = a - self.b = b - - def __str__(self): - return "({}, {})".format(self.a, self.b) - - def __repr__(self): - return self.__str__() - - -def hash_and_format(obj): - return "str:{}".format(hash(obj)) - - -class DeepHashTestCase(unittest.TestCase): - """DeepHash Tests.""" - - def test_hash_str(self): - obj = "a" - expected_result = {id(obj): hash_and_format(obj)} - result = DeepHash(obj) - self.assertEqual(result, expected_result) - - def test_hash_str_fail_if_mutable(self): - """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from - the previous init. - """ - obj1 = "a" - id_obj1 = id(obj1) - expected_result = {id_obj1: hash_and_format(obj1)} - result = DeepHash(obj1) - self.assertEqual(result, expected_result) - obj2 = "b" - result = DeepHash(obj2) - self.assertTrue(id_obj1 not in result) - - def test_list(self): - string1 = "a" - obj = [string1, 10, 20] - expected_result = { - id(string1): hash_and_format(string1), - id(obj): 'list:int:10,int:20,str:%s' % hash(string1) - } - result = DeepHash(obj) - self.assertEqual(result, expected_result) - - def test_tuple(self): - string1 = "a" - obj = (string1, 10, 20) - expected_result = { - id(string1): hash_and_format(string1), - id(obj): 'tuple:int:10,int:20,str:%s' % hash(string1) - } - result = DeepHash(obj) - self.assertEqual(result, expected_result) - - def test_named_tuples(self): - # checking if pypy3 is running the test - # in that case due to a pypy3 bug or something - # the id of x inside the named tuple changes. - x = "x" - x_id = id(x) - x_hash = hash(x) - Point = namedtuple('Point', [x]) - obj = Point(x=11) - result = DeepHash(obj) - if pypy3: - self.assertEqual(result[id(obj)], 'ntdict:{str:%s:int:11}' % - x_hash) - else: - expected_result = { - x_id: 'str:{}'.format(x_hash), - id(obj): 'ntdict:{str:%s:int:11}' % x_hash - } - self.assertEqual(result, expected_result) - - def test_dict(self): - string1 = "a" - hash_string1 = hash(string1) - key1 = "key1" - hash_key1 = hash(key1) - obj = {key1: string1, 1: 10, 2: 20} - expected_result = { - id(key1): "str:{}".format(hash_key1), - id(string1): "str:{}".format(hash_string1), - id(obj): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % - (hash_key1, hash_string1) - } - result = DeepHash(obj) - self.assertEqual(result, expected_result) - - def test_dict_in_list(self): - string1 = "a" - hash_string1 = hash(string1) - key1 = "key1" - hash_key1 = hash(key1) - dict1 = {key1: string1, 1: 10, 2: 20} - obj = [0, dict1] - expected_result = { - id(key1): "str:{}".format(hash_key1), - id(string1): "str:{}".format(hash_string1), - id(dict1): 'dict:{int:1:int:10;int:2:int:20;str:%s:str:%s}' % - (hash_key1, hash_string1), - id(obj): - 'list:dict:{int:1:int:10;int:2:int:20;str:%s:str:%s},int:0' % - (hash_key1, hash_string1) - } - result = DeepHash(obj) - self.assertEqual(result, expected_result) - - def test_nested_lists_same_hash(self): - t1 = [1, 2, [3, 4]] - t2 = [[4, 3], 2, 1] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_same_hash2(self): - t1 = [1, 2, [3, [4, 5]]] - t2 = [[[5, 4], 3], 2, 1] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_same_hash3(self): - t1 = [{1: [2, 3], 4: [5, [6, 7]]}] - t2 = [{4: [[7, 6], 5], 1: [3, 2]}] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_nested_lists_in_dictionary_same_hash(self): - t1 = [{"c": 4}, {"c": 3}] - t2 = [{"c": 3}, {"c": 4}] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_same_sets_same_hash(self): - t1 = {1, 3, 2} - t2 = {2, 3, 1} - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_same_sets_in_lists_same_hash(self): - t1 = ["a", {1, 3, 2}] - t2 = [{2, 3, 1}, "a"] - t1_hash = DeepHash(t1) - t2_hash = DeepHash(t2) - - self.assertEqual(t1_hash[id(t1)], t2_hash[id(t2)]) - - def test_unknown_parameters(self): - with self.assertRaises(ValueError): - DeepHash(1, wrong_param=2) - - def test_bad_attribute(self): - class Bad(object): - __slots__ = ['x', 'y'] - - def __getattr__(self, key): - raise AttributeError("Bad item") - - def __str__(self): - return "Bad Object" - - t1 = Bad() - - result = DeepHash(t1) - expected_result = {id(t1): result.unprocessed, 'unprocessed': [t1]} - self.assertEqual(result, expected_result) - - def test_repetition_by_default_does_not_effect(self): - list1 = [3, 4] - list1_id = id(list1) - a = [1, 2, list1] - a_id = id(a) - - list2 = [4, 3, 3] - list2_id = id(list2) - b = [list2, 2, 1] - b_id = id(b) - - hash_a = DeepHash(a) - hash_b = DeepHash(b) - - self.assertEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertEqual(hash_a[a_id], hash_b[b_id]) - - def test_setting_repetition_off_unequal_hash(self): - list1 = [3, 4] - list1_id = id(list1) - a = [1, 2, list1] - a_id = id(a) - - list2 = [4, 3, 3] - list2_id = id(list2) - b = [list2, 2, 1] - b_id = id(b) - - hash_a = DeepHash(a, ignore_repetition=False) - hash_b = DeepHash(b, ignore_repetition=False) - - self.assertNotEqual(hash_a[list1_id], hash_b[list2_id]) - self.assertNotEqual(hash_a[a_id], hash_b[b_id]) - - self.assertEqual(hash_a[list1_id].replace('3|1', '3|2'), - hash_b[list2_id]) - - def test_already_calculated_hash_wont_be_recalculated(self): - hashes = (i for i in range(10)) - - def hasher(obj): - return next(hashes) - - obj = "a" - expected_result = {id(obj): "str:0"} - result = DeepHash(obj, hasher=hasher) - self.assertEqual(result, expected_result) - - # we simply feed the last result to DeepHash - # So it can re-use the results. - result2 = DeepHash(obj, hasher=hasher, hashes=result) - # if hashes are not cached and re-used, - # then the next time hasher runs, it returns - # number 1 instead of 0. - self.assertEqual(result2, expected_result) - - result3 = DeepHash(obj, hasher=hasher) - expected_result = {id(obj): "str:{}".format(1)} - self.assertEqual(result3, expected_result) - - def test_skip_type(self): - l1 = logging.getLogger("test") - obj = {"log": l1, 2: 1337} - result = DeepHash(obj, exclude_types={logging.Logger}) - self.assertEqual(result[id(l1)], result.skipped) - - def test_hash_dic_with_loop(self): - obj = {2: 1337} - obj[1] = obj - result = DeepHash(obj) - expected_result = {id(obj): 'dict:{int:2:int:1337}'} - self.assertEqual(result, expected_result) - - def test_hash_iterable_with_loop(self): - obj = [1] - obj.append(obj) - result = DeepHash(obj) - expected_result = {id(obj): 'list:int:1'} - self.assertEqual(result, expected_result) - - def test_hash_iterable_with_excluded_type(self): - l1 = logging.getLogger("test") - obj = [1, l1] - result = DeepHash(obj, exclude_types={logging.Logger}) - self.assertTrue(id(l1) not in result) - - -class DeepHashSHA1TestCase(unittest.TestCase): - """DeepHash with SHA1 Tests.""" - - def test_hash_str(self): - obj = "a" - expected_result = { - id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_hash_str_fail_if_mutable(self): - """ - This test fails if ContentHash is getting a mutable copy of hashes - which means each init of the ContentHash will have hashes from - the previous init. - """ - obj1 = "a" - id_obj1 = id(obj1) - expected_result = { - id_obj1: 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj1, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - obj2 = "b" - result = DeepHash(obj2, hasher=DeepHash.sha1hex) - self.assertTrue(id_obj1 not in result) - - def test_bytecode(self): - obj = b"a" - if py3: - expected_result = { - id(obj): 'str:066c7cf4158717c47244fa6cf1caafca605d550b' - } - else: - expected_result = { - id(obj): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_list1(self): - string1 = "a" - obj = [string1, 10, 20] - expected_result = { - id(string1): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0', - id(obj): - 'list:int:10,int:20,str:48591f1d794734cabf55f96f5a5a72c084f13ac0' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) - - def test_dict1(self): - string1 = "a" - key1 = "key1" - obj = {key1: string1, 1: 10, 2: 20} - expected_result = { - id(key1): 'str:63216212fdf88fe0c838c36ab65278b9953000d6', - id(string1): 'str:48591f1d794734cabf55f96f5a5a72c084f13ac0', - id(obj): - 'dict:{int:1:int:10;int:2:int:20;str:63216212fdf88fe0c838c36ab65278b9953000d6:str:48591f1d794734cabf55f96f5a5a72c084f13ac0}' - } - result = DeepHash(obj, hasher=DeepHash.sha1hex) - self.assertEqual(result, expected_result) +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import re +import pytest +import logging +from deepdiff import DeepHash +from deepdiff.deephash import prepare_string_for_hashing, unprocessed +from deepdiff.helper import pypy3, get_id +from collections import namedtuple +from functools import partial +from enum import Enum + +logging.disable(logging.CRITICAL) + + +class CustomClass: + def __init__(self, a, b=None): + self.a = a + self.b = b + + def __str__(self): + return "({}, {})".format(self.a, self.b) + + def __repr__(self): + return self.__str__() + + +# Only the prep part of DeepHash. We don't need to test the actual hash function. +DeepHashPrep = partial(DeepHash, apply_hash=False) + + +def prep_str(obj, ignore_string_type_changes=True): + return obj if ignore_string_type_changes else 'str:{}'.format(obj) + + +class TestDeepHash: + + def test_dictionary(self): + + obj = {1: 1} + result = DeepHash(obj) + assert set(result.keys()) == {1, get_id(obj)} + + def test_get_hash_by_obj_is_the_same_as_by_obj_get_id(self): + a = "a" + obj = {1: a} + result = DeepHash(obj) + assert result[a] + + def test_get_hash_by_obj_when_does_not_exist(self): + a = "a" + obj = {1: a} + result = DeepHash(obj) + with pytest.raises(KeyError): + result[2] + + def test_list_of_sets(self): + a = {1} + b = {2} + obj = [a, b] + result = DeepHash(obj) + expected_result = {1, 2, get_id(a), get_id(b), get_id(obj)} + assert set(result.keys()) == expected_result + + def test_bad_attribute(self): + class Bad: + __slots__ = ['x', 'y'] + + def __getattr__(self, key): + raise AttributeError("Bad item") + + def __str__(self): + return "Bad Object" + + t1 = Bad() + + result = DeepHash(t1) + expected_result = {t1: unprocessed, 'unprocessed': [t1]} + assert expected_result == result + + def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): + a = 'hello' + b = b'hello' + a_hash = DeepHash(a, ignore_string_type_changes=True)[a] + b_hash = DeepHash(b, ignore_string_type_changes=True)[b] + assert a_hash == b_hash + + def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): + a = 'hello' + b = b'hello' + a_hash = DeepHash(a, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[a] + b_hash = DeepHash(b, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[b] + assert a_hash == b_hash + + +class TestDeepHashPrep: + """DeepHashPrep Tests covering object serialization.""" + + def test_prep_str(self): + obj = "a" + expected_result = {obj: prep_str(obj)} + result = DeepHashPrep(obj, ignore_string_type_changes=True) + assert expected_result == result + expected_result = {obj: prep_str(obj, ignore_string_type_changes=False)} + result = DeepHashPrep(obj, ignore_string_type_changes=False) + assert expected_result == result + + def test_dictionary_key_type_change(self): + obj1 = {"b": 10} + obj2 = {b"b": 10} + + result1 = DeepHashPrep(obj1, ignore_string_type_changes=True) + result2 = DeepHashPrep(obj2, ignore_string_type_changes=True) + assert result1[obj1] == result2[obj2] + assert result1["b"] == result2[b"b"] + + def test_number_type_change(self): + obj1 = 10 + obj2 = 10.0 + + result1 = DeepHashPrep(obj1) + result2 = DeepHashPrep(obj2) + assert result1[obj1] != result2[obj2] + + result1 = DeepHashPrep(obj1, ignore_numeric_type_changes=True) + result2 = DeepHashPrep(obj2, ignore_numeric_type_changes=True) + assert result1[obj1] == result2[obj2] + + def test_prep_str_fail_if_deephash_leaks_results(self): + """ + This test fails if DeepHash is getting a mutable copy of hashes + which means each init of the DeepHash will have hashes from + the previous init. + """ + obj1 = "a" + expected_result = {obj1: prep_str(obj1)} + result = DeepHashPrep(obj1, ignore_string_type_changes=True) + assert expected_result == result + obj2 = "b" + result = DeepHashPrep(obj2, ignore_string_type_changes=True) + assert obj1 not in result + + def test_dict_in_dict(self): + obj2 = {2: 3} + obj = {'a': obj2} + result = DeepHashPrep(obj, ignore_string_type_changes=True) + assert 'a' in result + assert obj2 in result + + def do_list_or_tuple(self, func, func_str): + string1 = "a" + obj = func([string1, 10, 20]) + if func is list: + obj_id = get_id(obj) + else: + obj_id = obj + string1_prepped = prep_str(string1) + expected_result = { + 10: 'int:10', + 20: 'int:20', + string1: string1_prepped, + obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped), + } + result = DeepHashPrep(obj, ignore_string_type_changes=True) + assert expected_result == result + + def test_list_and_tuple(self): + for func, func_str in ((list, 'list'), (tuple, 'tuple')): + self.do_list_or_tuple(func, func_str) + + def test_named_tuples(self): + # checking if pypy3 is running the test + # in that case due to a difference of string interning implementation + # the id of x inside the named tuple changes. + x = "x" + x_prep = prep_str(x) + Point = namedtuple('Point', [x]) + obj = Point(x=11) + result = DeepHashPrep(obj, ignore_string_type_changes=True) + if pypy3: + assert result[obj] == "ntPoint:{%s:int:11}" % x + else: + expected_result = { + x: x_prep, + obj: "ntPoint:{%s:int:11}" % x, + 11: 'int:11', + } + assert expected_result == result + + def test_enum(self): + class MyEnum(Enum): + A = 1 + B = 2 + + # checking if pypy3 is running the test + # in that case due to a difference of string interning implementation + # the ids of strings change + if pypy3: + # only compare the hashes for the enum instances themselves + assert DeepHashPrep(MyEnum.A)[MyEnum.A] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:B;str:_value_:int:2};str:_name_:str:A;str:_value_:int:1}' + assert DeepHashPrep(MyEnum.B)[MyEnum.B] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:A;str:_value_:int:1};str:_name_:str:B;str:_value_:int:2}' + assert DeepHashPrep(MyEnum(1))[MyEnum.A] == r'objMyEnum:{str:__objclass__:EnumMeta:objMyEnum:{str:_name_:str:B;str:_value_:int:2};str:_name_:str:A;str:_value_:int:1}' + else: + assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum.A) + assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum(1)) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.name) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.value) + assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.B) + + def test_dict_hash(self): + string1 = "a" + string1_prepped = prep_str(string1) + key1 = "key1" + key1_prepped = prep_str(key1) + obj = {key1: string1, 1: 10, 2: 20} + expected_result = { + 1: 'int:1', + 10: 'int:10', + 2: 'int:2', + 20: 'int:20', + key1: key1_prepped, + string1: string1_prepped, + get_id(obj): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % (key1, string1) + } + result = DeepHashPrep(obj, ignore_string_type_changes=True) + assert expected_result == result + + def test_dict_in_list(self): + string1 = "a" + key1 = "key1" + dict1 = {key1: string1, 1: 10, 2: 20} + obj = [0, dict1] + expected_result = { + 0: 'int:0', + 1: 'int:1', + 10: 'int:10', + 2: 'int:2', + 20: 'int:20', + key1: key1, + string1: string1, + get_id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % + (key1, string1), + get_id(obj): + 'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' % + (key1, string1) + } + result = DeepHashPrep(obj, ignore_string_type_changes=True) + assert expected_result == result + + def test_nested_lists_same_hash(self): + t1 = [1, 2, [3, 4]] + t2 = [[4, 3], 2, 1] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_nested_lists_same_hash2(self): + t1 = [1, 2, [3, [4, 5]]] + t2 = [[[5, 4], 3], 2, 1] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_nested_lists_same_hash3(self): + t1 = [{1: [2, 3], 4: [5, [6, 7]]}] + t2 = [{4: [[7, 6], 5], 1: [3, 2]}] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_nested_lists_in_dictionary_same_hash(self): + t1 = [{"c": 4}, {"c": 3}] + t2 = [{"c": 3}, {"c": 4}] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_same_sets_same_hash(self): + t1 = {1, 3, 2} + t2 = {2, 3, 1} + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_similar_sets_with_significant_digits_same_hash(self): + t1 = {0.012, 0.98} + t2 = {0.013, 0.99} + t1_hash = DeepHashPrep(t1, significant_digits=1) + t2_hash = DeepHashPrep(t2, significant_digits=1) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_same_sets_in_lists_same_hash(self): + t1 = ["a", {1, 3, 2}] + t2 = [{2, 3, 1}, "a"] + t1_hash = DeepHashPrep(t1) + t2_hash = DeepHashPrep(t2) + + assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] + + def test_unknown_parameters(self): + with pytest.raises(ValueError): + DeepHashPrep(1, wrong_param=2) + + def test_bad_attribute_prep(self): + class Bad: + __slots__ = ['x', 'y'] + + def __getattr__(self, key): + raise AttributeError("Bad item") + + def __str__(self): + return "Bad Object" + + t1 = Bad() + + result = DeepHashPrep(t1) + expected_result = {t1: unprocessed, 'unprocessed': [t1]} + assert expected_result == result + + class Burrito: + bread = 'flour' + + def __init__(self): + self.spicy = True + + class Taco: + bread = 'flour' + + def __init__(self): + self.spicy = True + + burrito = Burrito() + taco = Taco() + + @pytest.mark.parametrize("t1, t2, ignore_type_in_groups, is_qual", [ + (taco, burrito, [], False), + (taco, burrito, [(Taco, Burrito)], True), + ([taco], [burrito], [(Taco, Burrito)], True), + + ]) + def test_objects_with_same_content(self, t1, t2, ignore_type_in_groups, is_qual): + + t1_result = DeepHashPrep(t1, ignore_type_in_groups=ignore_type_in_groups) + t2_result = DeepHashPrep(t2, ignore_type_in_groups=ignore_type_in_groups) + assert is_qual == (t1_result[t1] == t2_result[t2]) + + def test_repetition_by_default_does_not_effect(self): + list1 = [3, 4] + list1_id = get_id(list1) + a = [1, 2, list1] + a_id = get_id(a) + + list2 = [4, 3, 3] + list2_id = get_id(list2) + b = [list2, 2, 1] + b_id = get_id(b) + + hash_a = DeepHashPrep(a) + hash_b = DeepHashPrep(b) + + assert hash_a[list1_id] == hash_b[list2_id] + assert hash_a[a_id] == hash_b[b_id] + + def test_setting_repetition_off_unequal_hash(self): + list1 = [3, 4] + list1_id = get_id(list1) + a = [1, 2, list1] + a_id = get_id(a) + + list2 = [4, 3, 3] + list2_id = get_id(list2) + b = [list2, 2, 1] + b_id = get_id(b) + + hash_a = DeepHashPrep(a, ignore_repetition=False) + hash_b = DeepHashPrep(b, ignore_repetition=False) + + assert not hash_a[list1_id] == hash_b[list2_id] + assert not hash_a[a_id] == hash_b[b_id] + + assert hash_a[list1_id].replace('3|1', '3|2') == hash_b[list2_id] + + def test_already_calculated_hash_wont_be_recalculated(self): + hashes = (i for i in range(10)) + + def hasher(obj): + return str(next(hashes)) + + obj = "a" + expected_result = {obj: '0'} + result = DeepHash(obj, hasher=hasher) + assert expected_result == result + + # we simply feed the last result to DeepHash + # So it can re-use the results. + result2 = DeepHash(obj, hasher=hasher, hashes=result) + # if hashes are not cached and re-used, + # then the next time hasher runs, it returns + # number 1 instead of 0. + assert expected_result == result2 + + result3 = DeepHash(obj, hasher=hasher) + expected_result = {obj: '1'} + assert expected_result == result3 + + def test_skip_type(self): + l1 = logging.getLogger("test") + obj = {"log": l1, 2: 1337} + result = DeepHashPrep(obj, exclude_types={logging.Logger}) + assert get_id(l1) not in result + + def test_skip_type2(self): + l1 = logging.getLogger("test") + result = DeepHashPrep(l1, exclude_types={logging.Logger}) + assert not result + + def test_prep_dic_with_loop(self): + obj = {2: 1337} + obj[1] = obj + result = DeepHashPrep(obj) + expected_result = {get_id(obj): 'dict:{int:2:int:1337}', 1: 'int:1', 2: 'int:2', 1337: 'int:1337'} + assert expected_result == result + + def test_prep_iterable_with_loop(self): + obj = [1] + obj.append(obj) + result = DeepHashPrep(obj) + expected_result = {get_id(obj): 'list:int:1', 1: 'int:1'} + assert expected_result == result + + def test_prep_iterable_with_excluded_type(self): + l1 = logging.getLogger("test") + obj = [1, l1] + result = DeepHashPrep(obj, exclude_types={logging.Logger}) + assert get_id(l1) not in result + + def test_skip_str_type_in_dict_on_list(self): + dic1 = {1: "a"} + t1 = [dic1] + dic2 = {} + t2 = [dic2] + t1_hash = DeepHashPrep(t1, exclude_types=[str]) + t2_hash = DeepHashPrep(t2, exclude_types=[str]) + assert 1 in t1_hash + assert t1_hash[dic1] == t2_hash[dic2] + + def test_skip_path(self): + dic1 = {1: "a"} + t1 = [dic1, 2] + dic2 = {} + t2 = [dic2, 2] + t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]']) + t2_hash = DeepHashPrep(t2, exclude_paths='root[0]') + assert 1 not in t1_hash + assert 2 in t1_hash + assert t1_hash[2] == t2_hash[2] + + def test_skip_regex_path(self): + dic1 = {1: "a"} + t1 = [dic1, 2] + exclude_re = re.compile(r'\[0\]') + t1_hash = DeepHashPrep(t1, exclude_regex_paths=r'\[0\]') + t2_hash = DeepHashPrep(t1, exclude_regex_paths=[exclude_re]) + assert 1 not in t1_hash + assert 2 in t1_hash + assert t1_hash[2] == t2_hash[2] + + +class TestDeepHashSHA1: + """DeepHash with SHA1 Tests.""" + + def test_prep_str_sha1(self): + obj = "a" + expected_result = { + obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert expected_result == result + + def test_prep_str_sha1_fail_if_mutable(self): + """ + This test fails if DeepHash is getting a mutable copy of hashes + which means each init of the DeepHash will have hashes from + the previous init. + """ + obj1 = "a" + expected_result = { + obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj1, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert expected_result == result + obj2 = "b" + result = DeepHash(obj2, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert obj1 not in result + + def test_bytecode(self): + obj = b"a" + expected_result = { + obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert expected_result == result + + def test_list1(self): + string1 = "a" + obj = [string1, 10, 20] + expected_result = { + string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + get_id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', + 10: DeepHash.sha1hex('int:10'), + 20: DeepHash.sha1hex('int:20'), + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert expected_result == result + + def test_dict1(self): + string1 = "a" + key1 = "key1" + obj = {key1: string1, 1: 10, 2: 20} + expected_result = { + 1: DeepHash.sha1hex('int:1'), + 10: DeepHash.sha1hex('int:10'), + 2: DeepHash.sha1hex('int:2'), + 20: DeepHash.sha1hex('int:20'), + key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327', + string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', + get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) + assert expected_result == result + + +class TestCleaningString: + + @pytest.mark.parametrize("text, ignore_string_type_changes, expected_result", [ + (b'hello', True, 'hello'), + (b'hello', False, 'bytes:hello'), + ('hello', True, 'hello'), + ('hello', False, 'str:hello'), + ]) + def test_clean_type(self, text, ignore_string_type_changes, expected_result): + result = prepare_string_for_hashing(text, ignore_string_type_changes=ignore_string_type_changes) + assert expected_result == result + + +class TestDeepHashMurmur3: + """DeepHash with Murmur3 Hash Tests.""" + + def test_prep_str_murmur3_64bit(self): + obj = "a" + expected_result = { + obj: 424475663186367154 + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_64bit) + assert expected_result == result + + def test_prep_str_murmur3_128bit(self): + obj = "a" + expected_result = { + obj: 119173504597196970070553896747624927922 + } + result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.murmur3_128bit) + assert expected_result == result diff --git a/tests/test_helper.py b/tests/test_helper.py index dffd1a2d..7b1aefe5 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -1,21 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - -import unittest from deepdiff.helper import short_repr -class HelperTestCase(unittest.TestCase): +class TestHelper: """Helper Tests.""" def test_short_repr_when_short(self): item = {1: 2} output = short_repr(item) - self.assertEqual(output, '{1: 2}') + assert output == '{1: 2}' def test_short_repr_when_long(self): item = {'Eat more': 'burritos'} output = short_repr(item) - self.assertEqual(output, "{'Eat more':...}") + assert output == "{'Eat more':...}" diff --git a/tests/test_model.py b/tests/test_model.py index f193840b..59b7c6a6 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,24 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_diff_ref - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_model.DiffLevelTestCase.test_path_when_both_children_empty -""" - -from unittest import TestCase import logging +import pytest from tests import CustomClass, CustomClassMisleadingRepr from deepdiff.model import (DiffLevel, ChildRelationship, DictRelationship, SubscriptableIterableRelationship, @@ -33,20 +16,20 @@ def get_param_from_obj(self, obj): return obj -class DictRelationshipTestCase(TestCase): - def setUp(self): - self.customkey = CustomClass(a=13, b=37) - self.customkey_misleading = CustomClassMisleadingRepr(a=11, b=20) - self.d = { +class TestDictRelationship: + def setup_class(cls): + cls.customkey = CustomClass(a=13, b=37) + cls.customkey_misleading = CustomClassMisleadingRepr(a=11, b=20) + cls.d = { 42: 'answer', 'vegan': 'for life', - self.customkey: 1337, - self.customkey_misleading: 'banana' + cls.customkey: 1337, + cls.customkey_misleading: 'banana' } def test_numkey(self): rel = DictRelationship(parent=self.d, child=self.d[42], param=42) - self.assertEqual(rel.get_param_repr(), "[42]") + assert rel.get_param_repr() == "[42]" def test_strkey(self): rel = ChildRelationship.create( @@ -55,136 +38,136 @@ def test_strkey(self): child=self.d['vegan'], param='vegan') result = rel.get_param_repr() - self.assertEqual(result, "['vegan']") + assert result == "['vegan']" def test_objkey(self): rel = DictRelationship( parent=self.d, child=self.d[self.customkey], param=self.customkey) - self.assertIsNone(rel.get_param_repr()) + assert rel.get_param_repr() is None def test_objkey_misleading_repr(self): rel = DictRelationship( parent=self.d, child=self.d[self.customkey_misleading], param=self.customkey_misleading) - self.assertIsNone(rel.get_param_repr()) + assert rel.get_param_repr() is None def test_get_param_from_dict(self): param = 42 rel = DictRelationship(parent=self.d, child=self.d[param], param=param) obj = {10: 10, param: 123} - self.assertEqual(rel.get_param_from_obj(obj), 123) + assert rel.get_param_from_obj(obj) == 123 -class ListRelationshipTestCase(TestCase): - def setUp(self): - self.custom = CustomClass(13, 37) - self.l = [1337, 'vegan', self.custom] +class TestListRelationship: + def setup_class(cls): + cls.custom = CustomClass(13, 37) + cls.l = [1337, 'vegan', cls.custom] def test_min(self): rel = SubscriptableIterableRelationship(self.l, self.l[0], 0) result = rel.get_param_repr() - self.assertEqual(result, "[0]") + assert result == "[0]" def test_max(self): rel = ChildRelationship.create(SubscriptableIterableRelationship, self.l, self.custom, 2) - self.assertEqual(rel.get_param_repr(), "[2]") + assert rel.get_param_repr() == "[2]" def test_get_param_from_obj(self): param = 0 rel = SubscriptableIterableRelationship(parent=self.l, child=self.l[param], param=param) obj = ['a', 'b', 'c'] - self.assertEqual(rel.get_param_from_obj(obj), 'a') + assert rel.get_param_from_obj(obj) == 'a' -class AttributeRelationshipTestCase(TestCase): - def setUp(self): - self.custom = CustomClass(13, 37) +class TestAttributeRelationship: + def setup_class(cls): + cls.custom = CustomClass(13, 37) def test_a(self): rel = AttributeRelationship(self.custom, 13, "a") result = rel.get_param_repr() - self.assertEqual(result, ".a") + assert result == ".a" def test_get_param_from_obj(self): rel = AttributeRelationship(self.custom, 13, "a") - self.assertEqual(rel.get_param_from_obj(self.custom), 13) + assert rel.get_param_from_obj(self.custom) == 13 -class DiffLevelTestCase(TestCase): - def setUp(self): +class TestDiffLevel: + def setup_class(cls): # Test data - self.custom1 = CustomClass(a='very long text here', b=37) - self.custom2 = CustomClass(a=313, b=37) - self.t1 = {42: 'answer', 'vegan': 'for life', 1337: self.custom1} - self.t2 = { + cls.custom1 = CustomClass(a='very long text here', b=37) + cls.custom2 = CustomClass(a=313, b=37) + cls.t1 = {42: 'answer', 'vegan': 'for life', 1337: cls.custom1} + cls.t2 = { 42: 'answer', 'vegan': 'for the animals', - 1337: self.custom2 + 1337: cls.custom2 } # Manually build diff, bottom up - self.lowest = DiffLevel( - self.custom1.a, self.custom2.a, report_type='values_changed') + cls.lowest = DiffLevel( + cls.custom1.a, cls.custom2.a, report_type='values_changed') # Test manual child relationship rel_int_low_t1 = AttributeRelationship( - parent=self.custom1, child=self.custom1.a, param="a") + parent=cls.custom1, child=cls.custom1.a, param="a") rel_int_low_t2 = AttributeRelationship( - parent=self.custom2, child=self.custom2.a, param="a") - self.intermediate = DiffLevel( - self.custom1, - self.custom2, - down=self.lowest, + parent=cls.custom2, child=cls.custom2.a, param="a") + cls.intermediate = DiffLevel( + cls.custom1, + cls.custom2, + down=cls.lowest, child_rel1=rel_int_low_t1, child_rel2=rel_int_low_t2) - self.lowest.up = self.intermediate + cls.lowest.up = cls.intermediate # Test automatic child relationship t1_child_rel = ChildRelationship.create( klass=DictRelationship, - parent=self.t1, - child=self.intermediate.t1, + parent=cls.t1, + child=cls.intermediate.t1, param=1337) t2_child_rel = ChildRelationship.create( klass=DictRelationship, - parent=self.t2, - child=self.intermediate.t2, + parent=cls.t2, + child=cls.intermediate.t2, param=1337) - self.highest = DiffLevel( - self.t1, - self.t2, - down=self.intermediate, + cls.highest = DiffLevel( + cls.t1, + cls.t2, + down=cls.intermediate, child_rel1=t1_child_rel, child_rel2=t2_child_rel) - self.intermediate.up = self.highest + cls.intermediate.up = cls.highest def test_all_up(self): - self.assertEqual(self.lowest.all_up, self.highest) + assert self.lowest.all_up == self.highest def test_all_down(self): - self.assertEqual(self.highest.all_down, self.lowest) + assert self.highest.all_down == self.lowest def test_automatic_child_rel(self): - self.assertIsInstance(self.highest.t1_child_rel, DictRelationship) - self.assertIsInstance(self.highest.t2_child_rel, DictRelationship) + assert isinstance(self.highest.t1_child_rel, DictRelationship) + assert isinstance(self.highest.t2_child_rel, DictRelationship) - self.assertEqual(self.highest.t1_child_rel.parent, self.highest.t1) - self.assertEqual(self.highest.t2_child_rel.parent, self.highest.t2) - self.assertEqual(self.highest.t1_child_rel.parent, self.highest.t1) - self.assertEqual(self.highest.t2_child_rel.parent, self.highest.t2) + assert self.highest.t1_child_rel.parent == self.highest.t1 + assert self.highest.t2_child_rel.parent == self.highest.t2 + assert self.highest.t1_child_rel.parent == self.highest.t1 + assert self.highest.t2_child_rel.parent == self.highest.t2 # Provides textual relationship from t1 to t1[1337] - self.assertEqual('[1337]', self.highest.t2_child_rel.get_param_repr()) + assert '[1337]' == self.highest.t2_child_rel.get_param_repr() def test_path(self): # Provides textual path all the way through - self.assertEqual(self.lowest.path("self.t1"), "self.t1[1337].a") + assert self.lowest.path("self.t1") == "self.t1[1337].a" def test_change_of_path_root(self): - self.assertEqual(self.lowest.path("root"), "root[1337].a") - self.assertEqual(self.lowest.path(""), "[1337].a") + assert self.lowest.path("root") == "root[1337].a" + assert self.lowest.path("") == "[1337].a" def test_path_when_both_children_empty(self): """ @@ -198,22 +181,21 @@ def test_path_when_both_children_empty(self): up = DiffLevel(t1, t2) down = up.down = DiffLevel(child_t1, child_t2) path = down.path() - self.assertEqual(path, 'root') + assert path == 'root' def test_repr_short(self): level = Verbose.level Verbose.level = 0 item_repr = repr(self.lowest) Verbose.level = level - self.assertEqual(item_repr, '') + assert item_repr == '' def test_repr_long(self): level = Verbose.level Verbose.level = 1 item_repr = repr(self.lowest) Verbose.level = level - self.assertEqual(item_repr, - "") + assert item_repr == "" def test_repetition_attribute_and_repr(self): t1 = [1, 1] @@ -221,20 +203,20 @@ def test_repetition_attribute_and_repr(self): some_repetition = 'some repetition' node = DiffLevel(t1, t2) node.additional['repetition'] = some_repetition - self.assertEqual(node.repetition, some_repetition) - self.assertEqual(repr(node), "") + assert node.repetition == some_repetition + assert repr(node) == "" -class ChildRelationshipTestCase(TestCase): +class TestChildRelationship: def test_create_invalid_klass(self): - with self.assertRaises(TypeError): + with pytest.raises(TypeError): ChildRelationship.create(DiffLevel, "hello", 42) def test_rel_repr_short(self): rel = WorkingChildRelationship(parent="that parent", child="this child", param="some param") rel_repr = repr(rel) expected = "" - self.assertEqual(rel_repr, expected) + assert rel_repr == expected def test_rel_repr_long(self): rel = WorkingChildRelationship( @@ -243,4 +225,4 @@ def test_rel_repr_long(self): param="some param") rel_repr = repr(rel) expected = "" - self.assertEqual(rel_repr, expected) + assert rel_repr == expected diff --git a/tests/test_search.py b/tests/test_search.py index 75e24645..5e6f237b 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,19 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.search_tests - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - nosetests tests/test_search.py:DeepSearchTestCase.test_case_insensitive_of_str_in_list -""" -import unittest +import pytest from deepdiff import DeepSearch, grep from datetime import datetime import logging @@ -34,44 +21,44 @@ def __repr__(self): return self.__str__() -class DeepSearchTestCase(unittest.TestCase): +class TestDeepSearch: """DeepSearch Tests.""" def test_number_in_list(self): obj = ["a", 10, 20] item = 10 result = {"matched_values": {'root[1]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_root(self): obj = "long string somewhere" result = {"matched_values": {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_root_verbose(self): obj = "long string somewhere" result = {"matched_values": {'root': "long string somewhere"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_tuple(self): obj = ("long", "string", 0, "somewhere") result = {"matched_values": {'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_list(self): obj = ["long", "string", 0, "somewhere"] result = {"matched_values": {'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_string_in_list_verbose(self): obj = ["long", "string", 0, "somewhere"] result = {"matched_values": {'root[3]': "somewhere"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_list_verbose2(self): obj = ["long", "string", 0, "somewhere great!"] result = {"matched_values": {'root[3]': "somewhere great!"}} - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_string_in_list_verbose3(self): obj = ["long somewhere", "string", 0, "somewhere great!"] @@ -81,14 +68,14 @@ def test_string_in_list_verbose3(self): 'root[3]': "somewhere great!" } } - self.assertEqual(DeepSearch(obj, item, verbose_level=2), result) + assert DeepSearch(obj, item, verbose_level=2) == result def test_int_in_dictionary(self): obj = {"long": "somewhere", "num": 2, 0: 0, "somewhere": "around"} item = 2 result = {'matched_values': {"root['num']"}} ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary(self): obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} @@ -97,7 +84,7 @@ def test_string_in_dictionary(self): 'matched_values': {"root['long']"} } ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_case_insensitive(self): obj = {"long": "Somewhere over there!", "string": 2, 0: 0, "SOMEWHERE": "around"} @@ -106,7 +93,7 @@ def test_string_in_dictionary_case_insensitive(self): 'matched_values': {"root['long']"} } ds = DeepSearch(obj, item, verbose_level=1, case_sensitive=False) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_key_case_insensitive_partial(self): obj = {"SOMEWHERE here": "around"} @@ -114,7 +101,7 @@ def test_string_in_dictionary_key_case_insensitive_partial(self): 'matched_paths': {"root['SOMEWHERE here']"} } ds = DeepSearch(obj, item, verbose_level=1, case_sensitive=False) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_verbose(self): obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} @@ -127,7 +114,7 @@ def test_string_in_dictionary_verbose(self): } } ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_string_in_dictionary_in_list_verbose(self): obj = [ @@ -148,25 +135,25 @@ def test_string_in_dictionary_in_list_verbose(self): } } ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_custom_object(self): obj = CustomClass('here, something', 'somewhere') result = {'matched_values': {'root.b'}} ds = DeepSearch(obj, item, verbose_level=1) - self.assertEqual(ds, result) + assert ds == result def test_custom_object_verbose(self): obj = CustomClass('here, something', 'somewhere out there') result = {'matched_values': {'root.b': 'somewhere out there'}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_custom_object_in_dictionary_verbose(self): obj = {1: CustomClass('here, something', 'somewhere out there')} result = {'matched_values': {'root[1].b': 'somewhere out there'}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_named_tuples_verbose(self): from collections import namedtuple @@ -181,13 +168,13 @@ def test_named_tuples_verbose(self): 'root.somewhere_good': 22 } } - self.assertEqual(ds, result) + assert ds == result def test_string_in_set_verbose(self): obj = {"long", "string", 0, "somewhere"} # result = {"matched_values": {'root[3]': "somewhere"}} ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(list(ds["matched_values"].values())[0], item) + assert list(ds["matched_values"].values())[0] == item def test_loop(self): class LoopTest(object): @@ -199,7 +186,7 @@ def __init__(self, a): ds = DeepSearch(obj, item, verbose_level=1) result = {'matched_values': {'root.a'}} - self.assertEqual(ds, result) + assert ds == result def test_loop_in_lists(self): obj = [1, 2, 'somewhere'] @@ -207,7 +194,7 @@ def test_loop_in_lists(self): ds = DeepSearch(obj, item, verbose_level=1) result = {'matched_values': {'root[2]'}} - self.assertEqual(ds, result) + assert ds == result def test_skip_path1(self): obj = { @@ -215,34 +202,46 @@ def test_skip_path1(self): "ingredients": ["no meat", "no eggs", "no dairy", "somewhere"] } ds = DeepSearch(obj, item, exclude_paths={"root['ingredients']"}) - self.assertEqual(ds, {}) + assert ds == {} def test_custom_object_skip_path(self): obj = CustomClass('here, something', 'somewhere') result = {} ds = DeepSearch(obj, item, verbose_level=1, exclude_paths=['root.b']) - self.assertEqual(ds, result) + assert ds == result def test_skip_list_path(self): obj = ['a', 'somewhere'] ds = DeepSearch(obj, item, exclude_paths=['root[1]']) result = {} - self.assertEqual(ds, result) + assert ds == result def test_skip_dictionary_path(self): obj = {1: {2: "somewhere"}} ds = DeepSearch(obj, item, exclude_paths=['root[1][2]']) result = {} - self.assertEqual(ds, result) + assert ds == result def test_skip_type_str(self): obj = "long string somewhere" result = {} ds = DeepSearch(obj, item, verbose_level=1, exclude_types=[str]) - self.assertEqual(ds, result) + assert ds == result + + def test_skip_regexp(self): + obj = [{'a': 1, 'b': "somewhere"}, {'c': 4, 'b': "somewhere"}] + ds = DeepSearch(obj, item, exclude_regex_paths=[r"root\[\d+\]"]) + result = {} + assert ds == result + + def test_skip_regexp2(self): + obj = {'a': [1, 2, [3, [item]]]} + ds = DeepSearch(obj, item, exclude_regex_paths=[r"\[\d+\]"]) + result = {} + assert ds == result def test_unknown_parameters(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError): DeepSearch(1, 1, wrong_param=2) def test_bad_attribute(self): @@ -259,44 +258,44 @@ def __str__(self): ds = DeepSearch(obj, item, verbose_level=1) result = {'unprocessed': ['root']} - self.assertEqual(ds, result) + assert ds == result ds = DeepSearch(obj, item, verbose_level=2) - self.assertEqual(ds, result) + assert ds == result def test_case_insensitive_of_str_in_list(self): obj = ["a", "bb", "BBC", "aBbB"] item = "BB" result = {"matched_values": {'root[1]', 'root[2]', 'root[3]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=False), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=False) == result def test_case_sensitive_of_str_in_list(self): obj = ["a", "bb", "BBC", "aBbB"] item = "BB" result = {"matched_values": {'root[2]'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=True), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=True) == result def test_case_sensitive_of_str_in_one_liner(self): obj = "Hello, what's up?" item = "WHAT" result = {} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=True), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=True) == result def test_case_insensitive_of_str_in_one_liner(self): obj = "Hello, what's up?" item = "WHAT" result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1, case_sensitive=False), result) + assert DeepSearch(obj, item, verbose_level=1, case_sensitive=False) == result def test_none(self): obj = item = None result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_complex_obj(self): obj = datetime(2017, 5, 4, 1, 1, 1) item = datetime(2017, 5, 4, 1, 1, 1) result = {'matched_values': {'root'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result def test_keep_searching_after_obj_match(self): @@ -312,6 +311,7 @@ def __eq__(self, other): obj = AlwaysEqual() item = AlwaysEqual() result = {'matched_values': {'root', 'root.some_attr'}} + assert DeepSearch(obj, item, verbose_level=1) == result def test_search_inherited_attributes(self): class Parent(object): @@ -323,10 +323,10 @@ class Child(Parent): obj = Child() item = 1 result = {'matched_values': {'root.a'}} - self.assertEqual(DeepSearch(obj, item, verbose_level=1), result) + assert DeepSearch(obj, item, verbose_level=1) == result -class GrepTestCase(unittest.TestCase): +class TestGrep: def test_grep_dict(self): obj = { @@ -334,4 +334,4 @@ def test_grep_dict(self): "ingredients": ["no meat", "no eggs", "no dairy", "somewhere"] } ds = obj | grep(item) - self.assertEqual(ds, {'matched_values': {"root['ingredients'][3]"}}) + assert ds == {'matched_values': {"root['ingredients'][3]"}} diff --git a/tests/test_serialization.py b/tests/test_serialization.py index a0862215..d8236ebb 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -1,69 +1,78 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" -To run only the search tests: - python -m unittest tests.test_serialization - -Or to run all the tests: - python -m unittest discover - -Or to run all the tests with coverage: - coverage run --source deepdiff setup.py test - -Or using Nose: - nosetests --with-coverage --cover-package=deepdiff - -To run a specific test, run this from the root of repo: - python -m unittest tests.test_serialization.DeepDiffTextTestCase.test_same_objects - -or using nosetests: - nosetests tests/test_serialization.py:DeepDiffTestCase.test_diff_when_hash_fails -""" -import unittest +import json +import pytest from deepdiff import DeepDiff import logging logging.disable(logging.CRITICAL) +t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} +t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} + -class DeepAdditionsTestCase(unittest.TestCase): +class TestDeepAdditions: """Tests for Additions and Subtractions.""" def test_serialization_text(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - self.assertTrue("deepdiff.helper.RemapDict" in ddiff.json) + assert "builtins.list" in ddiff.to_json_pickle() + jsoned = ddiff.to_json() + assert "world" in jsoned def test_deserialization(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2) - jsoned = ddiff.json + jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) - self.assertEqual(ddiff, ddiff2) + assert ddiff == ddiff2 def test_serialization_tree(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') - jsoned = ddiff.json - self.assertTrue("world" in jsoned) + pickle_jsoned = ddiff.to_json_pickle() + assert "world" in pickle_jsoned + jsoned = ddiff.to_json() + assert "world" in jsoned def test_deserialization_tree(self): - t1 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": [1, 2, 3]}} - t2 = {1: 1, 2: 2, 3: 3, 4: {"a": "hello", "b": "world\n\n\nEnd"}} ddiff = DeepDiff(t1, t2, view='tree') - jsoned = ddiff.json + jsoned = ddiff.to_json_pickle() ddiff2 = DeepDiff.from_json(jsoned) - self.assertTrue('type_changes' in ddiff2) + assert 'type_changes' in ddiff2 - def test_deleting_serialization_cache(self): + def test_deleting_serialization_cache_when_using_the_property(self): t1 = {1: 1} t2 = {1: 2} ddiff = DeepDiff(t1, t2) - self.assertFalse(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') is False ddiff.json - self.assertTrue(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') del ddiff.json - self.assertFalse(hasattr(ddiff, '_json')) + assert hasattr(ddiff, '_json') is False + + def test_serialize_custom_objects_throws_error(self): + class A: + pass + + class B: + pass + + t1 = A() + t2 = B() + ddiff = DeepDiff(t1, t2) + with pytest.raises(TypeError): + ddiff.to_json() + + def test_serialize_custom_objects_with_default_mapping(self): + class A: + pass + + class B: + pass + + t1 = A() + t2 = B() + ddiff = DeepDiff(t1, t2) + default_mapping = {A: lambda x: 'obj A', B: lambda x: 'obj B'} + result = ddiff.to_json(default_mapping=default_mapping) + expected_result = {"type_changes": {"root": {"old_type": "A", "new_type": "B", "old_value": "obj A", "new_value": "obj B"}}} + assert expected_result == json.loads(result)