diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 27223735..87a63d40 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -14,6 +14,9 @@ jobs: matrix: python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] architecture: ["x64"] + include: + - python-version: "3.10" + numpy-version: "2.0.dev" steps: - uses: actions/checkout@v2 - name: Setup Python ${{ matrix.python-version }} on ${{ matrix.architecture }} @@ -37,6 +40,9 @@ jobs: - name: Install dependencies if: matrix.python-version != 3.7 run: pip install -r requirements-dev.txt + - name: Install Numpy Dev + if: ${{ matrix.numpy-version }} + run: pip install -I --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple "numpy>=0.0.dev0" - name: Lint with flake8 if: matrix.python-version == 3.11 run: | diff --git a/AUTHORS.md b/AUTHORS.md index 34f8f697..85a84db2 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -58,3 +58,4 @@ Authors in order of the timeline of their contributions: - [kor4ik](https://github.com/kor4ik) for the bugfix for `include_paths` for nested dictionaries. - [martin-kokos](https://github.com/martin-kokos) for using tomli and tomli-w for dealing with tomli files. - [Alex Sauer-Budge](https://github.com/amsb) for the bugfix for `datetime.date`. +- [William Jamieson](https://github.com/WilliamJamieson) for [NumPy 2.0 compatibility](https://github.com/seperman/deepdiff/pull/422) diff --git a/README.md b/README.md index 1bb9a1cf..f353004c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 6.5.0 +# DeepDiff v 6.6.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -17,12 +17,17 @@ Tested on Python 3.7+ and PyPy3. -- **[Documentation](https://zepworks.com/deepdiff/6.5.0/)** +- **[Documentation](https://zepworks.com/deepdiff/6.6.0/)** ## What is new? Please check the [ChangeLog](CHANGELOG.md) file for the detailed information. +DeepDiff 6-6-0 + +- [Serialize To Flat Dicts]() +- [NumPy 2.0 compatibility](https://github.com/seperman/deepdiff/pull/422) by [William Jamieson](https://github.com/WilliamJamieson) + DeepDiff 6-5-0 - [parse_path](https://zepworks.com/deepdiff/current/faq.html#q-how-do-i-parse-deepdiff-result-paths) @@ -62,6 +67,13 @@ Install optional packages: +### A message from Sep, the creator of DeepDiff + +> 👋 Hi there, +> If you find value in DeepDiff, you might be interested in another tool I've crafted: [Qluster](https://qluster.ai/solution).
+> As an engineer, I understand the frustration of wrestling with **unruly data** in pipelines.
+> I developed **Qluster** to empower product managers and ops teams to control and resolve data issues autonomously and **stop bugging the engineers**! 🛠️ + # ChangeLog Please take a look at the [CHANGELOG](CHANGELOG.md) file. @@ -70,7 +82,6 @@ Please take a look at the [CHANGELOG](CHANGELOG.md) file. :mega: **Please fill out our [fast 5-question survey](https://forms.gle/E6qXexcgjoKnSzjB8)** so that we can learn how & why you use DeepDiff, and what improvements we should make. Thank you! :dancers: - # Contribute 1. Please make your PR against the dev branch @@ -86,11 +97,11 @@ Thank you! How to cite this library (APA style): - Dehpour, S. (2023). DeepDiff (Version 6.5.0) [Software]. Available from https://github.com/seperman/deepdiff. + Dehpour, S. (2023). DeepDiff (Version 6.6.0) [Software]. Available from https://github.com/seperman/deepdiff. How to cite this library (Chicago style): - Dehpour, Sep. 2023. DeepDiff (version 6.5.0). + Dehpour, Sep. 2023. DeepDiff (version 6.6.0). # Authors diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index 17fbb650..d6e97f84 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '6.5.0' +__version__ = '6.6.0' import logging if __name__ == '__main__': diff --git a/deepdiff/delta.py b/deepdiff/delta.py index c43d1982..0ee1ed84 100644 --- a/deepdiff/delta.py +++ b/deepdiff/delta.py @@ -1,4 +1,5 @@ import logging +from functools import partial from collections.abc import Mapping from copy import deepcopy from ordered_set import OrderedSet @@ -9,7 +10,7 @@ np_ndarray, np_array_factory, numpy_dtypes, get_doc, not_found, numpy_dtype_string_to_type, dict_, ) -from deepdiff.path import _path_to_elements, _get_nested_obj, _get_nested_obj_and_force, GET, GETATTR +from deepdiff.path import _path_to_elements, _get_nested_obj, _get_nested_obj_and_force, GET, GETATTR, parse_path from deepdiff.anyset import AnySet @@ -591,6 +592,155 @@ def dumps(self): def to_dict(self): return dict(self.diff) + @staticmethod + def _get_flat_row(action, info, _parse_path, keys_and_funcs): + for path, details in info.items(): + row = {'path': _parse_path(path), 'action': action} + for key, new_key, func in keys_and_funcs: + if key in details: + if func: + row[new_key] = func(details[key]) + else: + row[new_key] = details[key] + yield row + + def to_flat_dicts(self, include_action_in_path=False, report_type_changes=True): + """ + Returns a flat list of actions that is easily machine readable. + + For example: + {'iterable_item_added': {'root[3]': 5, 'root[2]': 3}} + + Becomes: + [ + {'path': [3], 'value': 5, 'action': 'iterable_item_added'}, + {'path': [2], 'value': 3, 'action': 'iterable_item_added'}, + ] + + + **Parameters** + + include_action_in_path : Boolean, default=False + When False, we translate DeepDiff's paths like root[3].attribute1 into a [3, 'attribute1']. + When True, we include the action to retrieve the item in the path: [(3, 'GET'), ('attribute1', 'GETATTR')] + + report_type_changes : Boolean, default=True + If False, we don't report the type change. Instead we report the value change. + + Example: + t1 = {"a": None} + t2 = {"a": 1} + + dump = Delta(DeepDiff(t1, t2)).dumps() + delta = Delta(dump) + assert t2 == delta + t1 + + flat_result = delta.to_flat_dicts() + flat_expected = [{'path': ['a'], 'action': 'type_changes', 'value': 1, 'new_type': int, 'old_type': type(None)}] + assert flat_expected == flat_result + + flat_result2 = delta.to_flat_dicts(report_type_changes=False) + flat_expected2 = [{'path': ['a'], 'action': 'values_changed', 'value': 1}] + + **List of actions** + + Here are the list of actions that the flat dictionary can return. + iterable_item_added + iterable_item_removed + values_changed + type_changes + set_item_added + set_item_removed + dictionary_item_added + dictionary_item_removed + attribute_added + attribute_removed + """ + result = [] + if include_action_in_path: + _parse_path = partial(parse_path, include_actions=True) + else: + _parse_path = parse_path + if report_type_changes: + keys_and_funcs = [ + ('value', 'value', None), + ('new_value', 'value', None), + ('old_value', 'old_value', None), + ('new_type', 'type', None), + ('old_type', 'old_type', None), + ('new_path', 'new_path', _parse_path), + ] + action_mapping = {} + else: + keys_and_funcs = [ + ('value', 'value', None), + ('new_value', 'value', None), + ('old_value', 'old_value', None), + ('new_path', 'new_path', _parse_path), + ] + action_mapping = {'type_changes': 'values_changed'} + + FLATTENING_NEW_ACTION_MAP = { + 'iterable_items_added_at_indexes': 'iterable_item_added', + 'iterable_items_removed_at_indexes': 'iterable_item_removed', + } + for action, info in self.diff.items(): + if action in FLATTENING_NEW_ACTION_MAP: + new_action = FLATTENING_NEW_ACTION_MAP[action] + for path, index_to_value in info.items(): + path = _parse_path(path) + for index, value in index_to_value.items(): + path2 = path.copy() + if include_action_in_path: + path2.append((index, 'GET')) + else: + path2.append(index) + result.append( + {'path': path2, 'value': value, 'action': new_action} + ) + elif action in {'set_item_added', 'set_item_removed'}: + for path, values in info.items(): + path = _parse_path(path) + for value in values: + result.append( + {'path': path, 'value': value, 'action': action} + ) + elif action == 'dictionary_item_added': + for path, value in info.items(): + path = _parse_path(path) + if isinstance(value, dict) and len(value) == 1: + new_key = next(iter(value)) + path.append(new_key) + value = value[new_key] + elif isinstance(value, (list, tuple)) and len(value) == 1: + value = value[0] + path.append(0) + action = 'iterable_item_added' + elif isinstance(value, set) and len(value) == 1: + value = value.pop() + action = 'set_item_added' + result.append( + {'path': path, 'value': value, 'action': action} + ) + elif action in { + 'dictionary_item_removed', 'iterable_item_added', + 'iterable_item_removed', 'attribute_removed', 'attribute_added' + }: + for path, value in info.items(): + path = _parse_path(path) + result.append( + {'path': path, 'value': value, 'action': action} + ) + else: + for row in self._get_flat_row( + action=action_mapping.get(action, action), + info=info, + _parse_path=_parse_path, + keys_and_funcs=keys_and_funcs, + ): + result.append(row) + return result + if __name__ == "__main__": # pragma: no cover import doctest diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 346f6271..680afb71 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -181,7 +181,7 @@ def __init__(self, self.custom_operators = custom_operators or [] self.ignore_order = ignore_order - self.ignore_order_func = ignore_order_func or (lambda *_args, **_kwargs: ignore_order) + self.ignore_order_func = ignore_order_func ignore_type_in_groups = ignore_type_in_groups or [] if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups: @@ -649,7 +649,7 @@ def _iterables_subscriptable(t1, t2): def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None): """Difference of iterables""" - if self.ignore_order_func(level): + if (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree) else: self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type, local_tree=local_tree) @@ -1103,7 +1103,9 @@ def _get_most_in_common_pairs_in_iterables( # And the objects with the same distances are grouped together in an ordered set. # It also includes a "max" key that is just the value of the biggest current distance in the # most_in_common_pairs dictionary. - most_in_common_pairs = defaultdict(lambda: defaultdict(OrderedSetPlus)) + def defaultdict_orderedset(): + return defaultdict(OrderedSetPlus) + most_in_common_pairs = defaultdict(defaultdict_orderedset) pairs = dict_() pre_calced_distances = None @@ -1390,7 +1392,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): # which means numpy module needs to be available. So np can't be None. raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover - if not self.ignore_order_func(level): + if (self.ignore_order_func and not self.ignore_order_func(level)) or not self.ignore_order: # fast checks if self.significant_digits is None: if np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality): @@ -1416,7 +1418,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None): dimensions = len(shape) if dimensions == 1: self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree) - elif self.ignore_order_func(level): + elif (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order: # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier. # They will be converted back to Numpy at their final dimension. level.t1 = level.t1.tolist() diff --git a/deepdiff/helper.py b/deepdiff/helper.py index cec24f38..a562af7d 100644 --- a/deepdiff/helper.py +++ b/deepdiff/helper.py @@ -42,11 +42,11 @@ class pydantic_base_model_type: np_uintp = np_type # pragma: no cover. np_float32 = np_type # pragma: no cover. np_float64 = np_type # pragma: no cover. - np_float_ = np_type # pragma: no cover. + np_double = np_type # pragma: no cover. np_floating = np_type # pragma: no cover. np_complex64 = np_type # pragma: no cover. np_complex128 = np_type # pragma: no cover. - np_complex_ = np_type # pragma: no cover. + np_cdouble = np_type # pragma: no cover. np_complexfloating = np_type # pragma: no cover. else: np_array_factory = np.array @@ -64,21 +64,21 @@ class pydantic_base_model_type: np_uintp = np.uintp np_float32 = np.float32 np_float64 = np.float64 - np_float_ = np.float_ + np_double = np.double # np.float_ is an alias for np.double and is being removed by NumPy 2.0 np_floating = np.floating np_complex64 = np.complex64 np_complex128 = np.complex128 - np_complex_ = np.complex_ + np_cdouble = np.cdouble # np.complex_ is an alias for np.cdouble and is being removed by NumPy 2.0 np_complexfloating = np.complexfloating numpy_numbers = ( np_int8, np_int16, np_int32, np_int64, np_uint8, np_uint16, np_uint32, np_uint64, np_intp, np_uintp, - np_float32, np_float64, np_float_, np_floating, np_complex64, - np_complex128, np_complex_,) + np_float32, np_float64, np_double, np_floating, np_complex64, + np_complex128, np_cdouble,) numpy_complex_numbers = ( - np_complexfloating, np_complex64, np_complex128, np_complex_, + np_complexfloating, np_complex64, np_complex128, np_cdouble, ) numpy_dtypes = set(numpy_numbers) @@ -655,7 +655,7 @@ def diff_numpy_array(A, B): By Divakar https://stackoverflow.com/a/52417967/1497443 """ - return A[~np.in1d(A, B)] + return A[~np.isin(A, B)] PYTHON_TYPE_TO_NUMPY_TYPE = { diff --git a/deepdiff/serialization.py b/deepdiff/serialization.py index 6f9ebe90..ef44d5db 100644 --- a/deepdiff/serialization.py +++ b/deepdiff/serialization.py @@ -44,7 +44,9 @@ from copy import deepcopy from functools import partial from collections.abc import Mapping -from deepdiff.helper import (strings, get_type, TEXT_VIEW) +from deepdiff.helper import ( + strings, get_type, TEXT_VIEW, np_float32, np_float64, np_int32, np_int64 +) from deepdiff.model import DeltaResult logger = logging.getLogger(__name__) @@ -539,6 +541,10 @@ def _serialize_decimal(value): bytes: lambda x: x.decode('utf-8'), datetime.datetime: lambda x: x.isoformat(), uuid.UUID: lambda x: str(x), + np_float32: float, + np_float64: float, + np_int32: int, + np_int64: int } if PydanticBaseModel: diff --git a/docs/authors.rst b/docs/authors.rst index 1720469b..317998df 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -83,6 +83,7 @@ Authors in order of the timeline of their contributions: and tomli-w for dealing with tomli files. - `Alex Sauer-Budge `__ for the bugfix for ``datetime.date``. +- `William Jamieson `__ for `NumPy 2.0 compatibility `__ .. _Sep Dehpour (Seperman): http://www.zepworks.com .. _Victor Hahn Castell: http://hahncastell.de diff --git a/docs/conf.py b/docs/conf.py index ff6a46d3..d7c4d9df 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,9 +61,9 @@ # built documents. # # The short X.Y version. -version = '6.5.0' +version = '6.6.0' # The full version, including alpha/beta/rc tags. -release = '6.5.0' +release = '6.6.0' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) @@ -74,7 +74,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -124,14 +124,17 @@ # further. For a list of options available for each theme, see the # documentation. html_theme_options = { - 'description': 'Get the deep difference of any Python objects', + 'description': 'Get the deep difference of any Python objects.', 'show_powered_by': False, 'logo': 'logo_long_B1_black.svg', 'logo_name': 'Zepworks DeepDiff', 'analytics_id': 'UA-59104908-2', 'fixed_sidebar': True, 'extra_nav_links': { - 'Zepworks': 'https://zepworks.com', 'Github': 'https://github.com/seperman/deepdiff'}, + 'Zepworks (Blog)': 'https://zepworks.com', + 'Qluster (Smart ETL)': 'https://qluster.ai', + 'Github': 'https://github.com/seperman/deepdiff' + }, 'show_relbars': True, # 'github_repo': 'deepdiff', 'anchor': '#DDD', @@ -219,7 +222,7 @@ # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value diff --git a/docs/delta.rst b/docs/delta.rst index f053ceb8..b1b7e433 100644 --- a/docs/delta.rst +++ b/docs/delta.rst @@ -59,6 +59,22 @@ verify_symmetry : Boolean, default=False A delta object that can be added to t1 to recreate t2. + Delta objects can contain the following vocabulary: + + iterable_item_added + iterable_item_moved + iterable_item_removed + set_item_added + set_item_removed + dictionary_item_added + dictionary_item_removed + attribute_added + attribute_removed + type_changes + values_changed + iterable_items_added_at_indexes + iterable_items_removed_at_indexes + .. _delta_diff_label: @@ -192,6 +208,14 @@ Delta Serializer DeepDiff uses pickle to serialize delta objects by default. Please take a look at the :ref:`delta_deserializer_label` for more information. + +.. _to_flat_dicts: + +Delta Serialize To Flat Dictionaries +------------------------------------ + +Read about :ref:`delta_to_flat_dicts_label` + .. _delta_dump_safety_label: Delta Dump Safety @@ -456,7 +480,7 @@ Unable to get the item at root['x']['y'][3]: 'x' Unable to get the item at root['q']['t'] {} -# Once we set the force to be True +Once we set the force to be True >>> delta = Delta(diff, force=True) >>> {} + delta diff --git a/docs/diff_doc.rst b/docs/diff_doc.rst index 43775b2b..95acc156 100644 --- a/docs/diff_doc.rst +++ b/docs/diff_doc.rst @@ -193,3 +193,12 @@ view: string, default = text int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! +.. admonition:: A message from `Sep `__, the creator of DeepDiff + + | 👋 Hi there, + | + | Thank you for using DeepDiff! + | As an engineer, I understand the frustration of wrestling with **unruly data** in pipelines. + | That's why I developed a new tool - `Qluster `__ to empower non-engineers to control and resolve data issues at scale autonomously and **stop bugging the engineers**! 🛠️ + | + | If you are going through this pain now, I would love to give you `early access `__ to Qluster and get your feedback. diff --git a/docs/faq.rst b/docs/faq.rst index 0a63be85..9fbaff7a 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -107,4 +107,17 @@ Use parse_path: >>> parse_path("root['joe'].age", include_actions=True) [{'element': 'joe', 'action': 'GET'}, {'element': 'age', 'action': 'GETATTR'}] + +--------- + +.. admonition:: A message from `Sep `__, the creator of DeepDiff + + | 👋 Hi there, + | + | Thank you for using DeepDiff! + | As an engineer, I understand the frustration of wrestling with **unruly data** in pipelines. + | That's why I developed a new tool - `Qluster `__ to empower non-engineers to control and resolve data issues at scale autonomously and **stop bugging the engineers**! 🛠️ + | + | If you are going through this pain now, I would love to give you `early access `__ to Qluster and get your feedback. + Back to :doc:`/index` diff --git a/docs/index.rst b/docs/index.rst index 3fc5a0ce..5c8ca52f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 6.5.0 documentation! +DeepDiff 6.6.0 documentation! ============================= ******* @@ -31,6 +31,12 @@ The DeepDiff library includes the following modules: What Is New *********** +DeepDiff 6-6-0 +-------------- + + - :ref:`delta_to_flat_dicts_label` can be used to serialize delta objects into a flat list of dictionaries. + - `NumPy 2.0 compatibility `__ by `William Jamieson `__ + DeepDiff 6-5-0 -------------- @@ -57,6 +63,7 @@ Tutorials ********* Tutorials can be found on `Zepworks blog `_ + ************ Installation @@ -123,7 +130,6 @@ Supported data types int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple, Numpy, custom objects and more! - References ========== diff --git a/docs/serialization.rst b/docs/serialization.rst index 3b409f1c..b3a49a98 100644 --- a/docs/serialization.rst +++ b/docs/serialization.rst @@ -105,4 +105,48 @@ From Json Pickle Load the diff object from the json pickle dump. Take a look at the above :ref:`to_json_pickle_label` for an example. + +.. _delta_to_flat_dicts_label: + +Delta Serialize To Flat Dictionaries +------------------------------------ + +Sometimes, it is desired to serialize a :ref:`delta_label` object to a list of flat dictionaries. For example, to store them in relation databases. In that case, you can use the Delta.to_flat_dicts to achieve the desired outcome. + +For example: + + >>> from pprint import pprint + >>> from deepdiff import DeepDiff, Delta + >>> t1 = {"key1": "value1"} + >>> t2 = {"field2": {"key2": "value2"}} + >>> diff = DeepDiff(t1, t2, verbose_level=2) + >>> pprint(diff, indent=2) + { 'dictionary_item_added': {"root['field2']": {'key2': 'value2'}}, + 'dictionary_item_removed': {"root['key1']": 'value1'}} + >>> + >>> delta = Delta(diff, verify_symmetry=True) + >>> flat_dicts = delta.to_flat_dicts() + >>> pprint(flat_dicts, indent=2) + [ { 'action': 'dictionary_item_added', + 'path': ['field2', 'key2'], + 'value': 'value2'}, + {'action': 'dictionary_item_removed', 'path': ['key1'], 'value': 'value1'}] + + +Example 2: + + >>> t3 = ["A", "B"] + >>> t4 = ["A", "B", "C", "D"] + >>> diff = DeepDiff(t3, t4, verbose_level=2) + >>> pprint(diff, indent=2) + {'iterable_item_added': {'root[2]': 'C', 'root[3]': 'D'}} + >>> + >>> delta = Delta(diff, verify_symmetry=True) + >>> flat_dicts = delta.to_flat_dicts() + >>> pprint(flat_dicts, indent=2) + [ {'action': 'iterable_item_added', 'path': [2], 'value': 'C'}, + {'action': 'iterable_item_added', 'path': [3], 'value': 'D'}] + + + Back to :doc:`/index` diff --git a/docs/support.rst b/docs/support.rst index af91b515..48fec1ec 100644 --- a/docs/support.rst +++ b/docs/support.rst @@ -3,22 +3,23 @@ Support ======= -Hello, +👋 Hi there, -This is Sep, the creator of DeepDiff. Thanks for using DeepDiff! -If you find a bug, please create a ticket on our `github repo`_ +Thank you for using DeepDiff! +If you find a bug, please create a ticket on our `GitHub repo `__ -Contributions to DeepDiff are always very welcome! More than `50 people `__ have contributed code to DeepDiff so far. +I am **available for consulting** if you need immediate help or custom implementations of DeepDiff. You can reach me by emailing hello at this domain. -I love working on DeepDiff and other open-source projects. These projects will stay free and open source forever. If my work has been helpful to you, I would appreciate any sponsorship. Also, if you have any issue with my code that needs my immediate attention, I will be grateful for donations. +I love working on DeepDiff and other open-source projects. These projects will stay free and open source forever. If my work has been helpful to you, I would appreciate any sponsorship. -Please `click here `__ to read -more about sponsoring my work. +Please `click here `__ for sponsorship information. -Thank you! +| Lastly, you might be interested in another tool I've crafted: ⚡ `Qluster `__. +| As an engineer, I understand the frustration of wrestling with **unruly data** in pipelines. +| I developed Qluster to empower product managers and ops teams to control and resolve data issues autonomously and **stop bugging the engineers**! 🛠️ +| +| If you are going through this pain now, I would love to give you `early access `__ to Qluster and get your feedback. -Sep -.. _github repo: https://github.com/seperman/deepdiff Back to :doc:`/index` diff --git a/requirements-dev.txt b/requirements-dev.txt index 25a40748..f7dff137 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,17 +5,17 @@ jsonpickle==3.0.2 coverage==6.5.0 ipdb==0.13.13 numpy==1.24.4 -pytest==7.4.0 +pytest==7.4.2 pytest-cov==4.1.0 python-dotenv==0.21.0 watchdog==2.2.0 -Sphinx==5.3.0 -sphinx-sitemap==2.2.1 +Sphinx==6.2.1 # We use the html style that is not supported in Sphinx 7 anymore. +sphinx-sitemap==2.5.1 sphinxemoji==0.2.0 -flake8==6.0.0 +flake8==6.1.0 python-dateutil==2.8.2 -orjson==3.8.3 -wheel==0.38.1 -tomli==2.0.0 +orjson==3.9.7 +wheel==0.41.2 +tomli==2.0.1 tomli-w==1.0.0 -pydantic==1.10.8 +pydantic==2.4.2 diff --git a/setup.cfg b/setup.cfg index ac1829e9..57aa59c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 6.5.0 +current_version = 6.6.0 commit = True tag = True tag_name = {new_version} diff --git a/setup.py b/setup.py index 3c1af24d..7944f651 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '6.5.0' +version = '6.6.0' def get_reqs(filename): diff --git a/tests/test_delta.py b/tests/test_delta.py index d56f1231..dcb2bd71 100644 --- a/tests/test_delta.py +++ b/tests/test_delta.py @@ -5,6 +5,7 @@ import sys from decimal import Decimal from unittest import mock +from ordered_set import OrderedSet from deepdiff import Delta, DeepDiff from deepdiff.helper import np, number_to_string, TEXT_VIEW, DELTA_VIEW, CannotCompare from deepdiff.path import GETATTR, GET @@ -63,6 +64,14 @@ def test_list_difference_add_delta(self): assert delta + t1 == t2 assert t1 + delta == t2 + flat_result1 = delta.to_flat_dicts() + flat_expected1 = [ + {'path': [3], 'value': 5, 'action': 'iterable_item_added'}, + {'path': [2], 'value': 3, 'action': 'iterable_item_added'}, + ] + + assert flat_expected1 == flat_result1 + def test_list_difference_dump_delta(self): t1 = [1, 2] t2 = [1, 2, 3, 5] @@ -213,6 +222,11 @@ def test_identical_delta(self): t1 = [1, 3] assert t1 + delta == t1 + flat_result1 = delta.to_flat_dicts() + flat_expected1 = [] + + assert flat_expected1 == flat_result1 + def test_delta_mutate(self): t1 = [1, 2] t2 = [1, 2, 3, 5] @@ -266,6 +280,15 @@ def test_list_difference3_delta(self): assert delta + t1 == t2 assert t1 + delta == t2 + flat_result1 = delta.to_flat_dicts() + flat_expected1 = [ + {'path': [4, 'b', 2], 'action': 'values_changed', 'value': 2, 'old_value': 5}, + {'path': [4, 'b', 1], 'action': 'values_changed', 'value': 3, 'old_value': 2}, + {'path': [4, 'b', 3], 'value': 5, 'action': 'iterable_item_added'}, + ] + + assert flat_expected1 == flat_result1 + def test_list_difference_delta_raises_error_if_prev_value_does_not_match(self): t1 = [1, 2, 6] t2 = [1, 3, 2, 5] @@ -295,6 +318,15 @@ def test_list_difference_delta_raises_error_if_prev_value_does_not_match(self): delta2 = Delta(diff, verify_symmetry=False) assert delta2 + t1 == t2 + flat_result2 = delta2.to_flat_dicts() + flat_expected2 = [ + {'path': [2], 'action': 'values_changed', 'value': 2, 'old_value': 5}, + {'path': [1], 'action': 'values_changed', 'value': 3, 'old_value': 2}, + {'path': [3], 'value': 5, 'action': 'iterable_item_added'}, + ] + + assert flat_expected2 == flat_result2 + def test_list_difference_delta1(self): t1 = { 1: 1, @@ -317,6 +349,14 @@ def test_list_difference_delta1(self): assert delta + t1 == t2 + flat_result = delta.to_flat_dicts() + flat_expected = [ + {'path': [4, 'b', 2], 'value': 'to_be_removed', 'action': 'iterable_item_removed'}, + {'path': [4, 'b', 3], 'value': 'to_be_removed2', 'action': 'iterable_item_removed'}, + ] + + assert flat_expected == flat_result + @mock.patch('deepdiff.delta.logger.error') def test_list_difference_delta_if_item_is_already_removed(self, mock_logger): t1 = [1, 2, 'to_be_removed'] @@ -1167,6 +1207,28 @@ def test_list_ignore_order_various_deltas2(self): t1_plus_delta2 = t1 + delta2 assert t1_plus_delta2 == (8, 4, 4, 1, 3, 4, 1, 7) + flat_result1 = delta1.to_flat_dicts() + flat_expected1 = [ + {'path': [0], 'value': 7, 'action': 'iterable_item_added'}, + {'path': [6], 'value': 8, 'action': 'iterable_item_added'}, + {'path': [1], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [2], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [5], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [6], 'value': 6, 'action': 'iterable_item_removed'}, + {'path': [0], 'value': 5, 'action': 'iterable_item_removed'}, + ] + assert flat_expected1 == flat_result1 + + flat_result2 = delta2.to_flat_dicts() + flat_expected2 = [ + {'path': [1], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [2], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [5], 'value': 4, 'action': 'iterable_item_added'}, + {'path': [6], 'action': 'values_changed', 'value': 7}, + {'path': [0], 'action': 'values_changed', 'value': 8}, + ] + assert flat_expected2 == flat_result2 + def test_delta_view_and_to_delta_dict_are_equal_when_parameteres_passed(self): """ This is a test that passes parameters in a dictionary instead of kwargs. @@ -1296,6 +1358,15 @@ def test_apply_delta_to_incompatible_object6_value_change(self): t4 = delta2 + t3 assert [] == t4 + flat_result2 = delta2.to_flat_dicts() + flat_expected2 = [{'path': [1, 2, 0], 'action': 'values_changed', 'value': 5}] + assert flat_expected2 == flat_result2 + + delta3 = Delta(diff, raise_errors=False, verify_symmetry=True) + flat_result3 = delta3.to_flat_dicts() + flat_expected3 = [{'path': [1, 2, 0], 'action': 'values_changed', 'value': 5, 'old_value': 4}] + assert flat_expected3 == flat_result3 + def test_apply_delta_to_incompatible_object7_type_change(self): t1 = ['1'] t2 = [1] @@ -1397,6 +1468,10 @@ def test_delta_to_dict(self): expected = {'iterable_items_removed_at_indexes': {'root': {2: 'B'}}} assert expected == result + flat_result = delta.to_flat_dicts() + flat_expected = [{'action': 'iterable_item_removed', 'path': [2], 'value': 'B'}] + assert flat_expected == flat_result + def test_class_type_change(self): t1 = CustomClass t2 = CustomClass2 @@ -1445,6 +1520,30 @@ def test_none_in_delta_object(self): delta = Delta(dump) assert t2 == delta + t1 + flat_result = delta.to_flat_dicts() + flat_expected = [{'path': ['a'], 'action': 'type_changes', 'value': 1, 'type': int, 'old_type': type(None)}] + assert flat_expected == flat_result + + flat_result2 = delta.to_flat_dicts(report_type_changes=False) + flat_expected2 = [{'path': ['a'], 'action': 'values_changed', 'value': 1}] + assert flat_expected2 == flat_result2 + + def test_delta_set_in_objects(self): + t1 = [[1, OrderedSet(['A', 'B'])], {1}] + t2 = [[2, OrderedSet([10, 'C', 'B'])], {1}] + delta = Delta(DeepDiff(t1, t2)) + flat_result = delta.to_flat_dicts() + flat_expected = [ + {'path': [0, 1], 'value': 10, 'action': 'set_item_added'}, + {'path': [0, 0], 'action': 'values_changed', 'value': 2}, + {'path': [0, 1], 'value': 'A', 'action': 'set_item_removed'}, + {'path': [0, 1], 'value': 'C', 'action': 'set_item_added'}, + ] + # Sorting because otherwise the order is not deterministic for sets, + # even though we are using OrderedSet here. It still is converted to set at some point and loses its order. + flat_result.sort(key=lambda x: str(x['value'])) + assert flat_expected == flat_result + def test_delta_with_json_serializer(self): t1 = {"a": 1} t2 = {"a": 2} @@ -1547,6 +1646,16 @@ def test_compare_func_with_duplicates_removed(self): recreated_t2 = t1 + delta assert t2 == recreated_t2 + flat_result = delta.to_flat_dicts() + flat_expected = [ + {'path': [2], 'value': {'id': 1, 'val': 3}, 'action': 'iterable_item_removed'}, + {'path': [0], 'value': {'id': 1, 'val': 3}, 'action': 'iterable_item_removed'}, + {'path': [3], 'value': {'id': 3, 'val': 3}, 'action': 'iterable_item_removed'}, + {'path': [0], 'action': 'iterable_item_moved', 'value': {'id': 1, 'val': 3}, 'new_path': [2]}, + {'path': [3], 'action': 'iterable_item_moved', 'value': {'id': 3, 'val': 3}, 'new_path': [0]}, + ] + assert flat_expected == flat_result + def test_compare_func_with_duplicates_added(self): t1 = [{'id': 3, 'val': 3}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}] t2 = [{'id': 1, 'val': 1}, {'id': 2, 'val': 2}, {'id': 1, 'val': 3}, {'id': 3, 'val': 3}] @@ -1695,3 +1804,97 @@ def test_delta_force1(self): result = {} + delta expected = {'x': {'y': {3: 4}}, 'q': {'t': 0.5}} assert expected == result + + def test_flatten_dict_with_one_key_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe Nobody"}, "field2": {"jimmy": "Jimmy"}} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [ + {'path': ['field2', 'jimmy'], 'value': 'Jimmy', 'action': 'dictionary_item_added'}, + {'path': ['field1', 'joe'], 'action': 'values_changed', 'value': 'Joe Nobody'}, + ] + assert expected_result == flat_result + + def test_flatten_dict_with_multiple_keys_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe Nobody"}, "field2": {"jimmy": "Jimmy", "sar": "Sarah"}} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [ + {'path': ['field2'], 'value': {'jimmy': 'Jimmy', 'sar': 'Sarah'}, 'action': 'dictionary_item_added'}, + {'path': ['field1', 'joe'], 'action': 'values_changed', 'value': 'Joe Nobody'}, + ] + assert expected_result == flat_result + + def test_flatten_list_with_one_item_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe"}, "field2": ["James"]} + t3 = {"field1": {"joe": "Joe"}, "field2": ["James", "Jack"]} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [{'path': ['field2', 0], 'value': 'James', 'action': 'iterable_item_added'}] + assert expected_result == flat_result + + diff = DeepDiff(t2, t3) + delta2 = Delta(diff=diff) + flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + expected_result2 = [{'path': ['field2', 1], 'value': 'Jack', 'action': 'iterable_item_added'}] + assert expected_result2 == flat_result2 + + def test_flatten_set_with_one_item_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe"}, "field2": {"James"}} + t3 = {"field1": {"joe": "Joe"}, "field2": {"James", "Jack"}} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [{'path': ['field2'], 'value': 'James', 'action': 'set_item_added'}] + assert expected_result == flat_result + + diff = DeepDiff(t2, t3) + delta2 = Delta(diff=diff) + flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + expected_result2 = [{'path': ['field2'], 'value': 'Jack', 'action': 'set_item_added'}] + assert expected_result2 == flat_result2 + + def test_flatten_tuple_with_one_item_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe"}, "field2": ("James", )} + t3 = {"field1": {"joe": "Joe"}, "field2": ("James", "Jack")} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [{'path': ['field2', 0], 'value': 'James', 'action': 'iterable_item_added'}] + assert expected_result == flat_result + + diff = DeepDiff(t2, t3) + delta2 = Delta(diff=diff) + flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + expected_result2 = [{'path': ['field2', 1], 'value': 'Jack', 'action': 'iterable_item_added'}] + assert expected_result2 == flat_result2 + + def test_flatten_list_with_multiple_item_added(self): + t1 = {"field1": {"joe": "Joe"}} + t2 = {"field1": {"joe": "Joe"}, "field2": ["James", "Jack"]} + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [{'path': ['field2'], 'value': ['James', 'Jack'], 'action': 'dictionary_item_added'}] + assert expected_result == flat_result + + delta2 = Delta(diff=diff, verify_symmetry=True) + flat_result2 = delta2.to_flat_dicts(report_type_changes=False) + assert expected_result == flat_result2 + + def test_flatten_attribute_added(self): + t1 = picklalbe_obj_without_item + t2 = PicklableClass(10) + diff = DeepDiff(t1, t2) + delta = Delta(diff=diff) + flat_result = delta.to_flat_dicts(report_type_changes=False) + expected_result = [{'path': ['item'], 'value': 10, 'action': 'attribute_added'}] + assert expected_result == flat_result diff --git a/tests/test_diff_other.py b/tests/test_diff_other.py index e7bc27df..067ee669 100644 --- a/tests/test_diff_other.py +++ b/tests/test_diff_other.py @@ -2,10 +2,27 @@ import datetime from time import sleep from unittest import mock +from functools import partial +from collections import namedtuple +from deepdiff import DeepHash +from deepdiff.helper import pypy3 from deepdiff.model import DiffLevel from deepdiff.diff import ( DeepDiff, PROGRESS_MSG, INVALID_VIEW_MSG, VERBOSE_LEVEL_RANGE_MSG, PURGE_LEVEL_RANGE_MSG) +from concurrent.futures.process import ProcessPoolExecutor +from concurrent.futures import as_completed + +# Only the prep part of DeepHash. We don't need to test the actual hash function. +DeepHashPrep = partial(DeepHash, apply_hash=False) + + +def prep_str(obj, ignore_string_type_changes=True): + return obj if ignore_string_type_changes else 'str:{}'.format(obj) + + +Point = namedtuple('Point', ["x"]) +point_obj = Point(x=11) class SlowDiffLevel(DiffLevel): @@ -120,3 +137,66 @@ def test_bool_str2(self): def test_get_distance_cache_key(self): result = DeepDiff._get_distance_cache_key(added_hash=5, removed_hash=20) assert b'0x14--0x5dc' == result + + def test_multi_processing1(self): + + t1 = [[1, 2, 3, 9], [1, 2, 4, 10]] + t2 = [[1, 2, 4, 10], [1, 2, 3, 10]] + + futures = [] + expected_result = { + 'values_changed': { + 'root[0][2]': { + 'new_value': 4, + 'old_value': 3 + }, + 'root[0][3]': { + 'new_value': 10, + 'old_value': 9 + }, + 'root[1][2]': { + 'new_value': 3, + 'old_value': 4 + } + } + } + + with ProcessPoolExecutor(max_workers=1) as executor: + futures.append(executor.submit(DeepDiff, t1, t2)) + + for future in as_completed(futures, timeout=10): + assert not future._exception + assert expected_result == future._result + + def test_multi_processing2_with_ignore_order(self): + + t1 = [[1, 2, 3, 9], [1, 2, 4, 10]] + t2 = [[1, 2, 4, 10], [1, 2, 3, 10]] + + futures = [] + expected_result = {'values_changed': {'root[0][3]': {'new_value': 10, 'old_value': 9}}} + + with ProcessPoolExecutor(max_workers=1) as executor: + futures.append(executor.submit(DeepDiff, t1, t2, ignore_order=True)) + + for future in as_completed(futures, timeout=10): + assert not future._exception + assert expected_result == future._result + + @pytest.mark.skipif(pypy3, reason="pypy3 expected results are different") + def test_multi_processing3_deephash(self): + x = "x" + x_prep = prep_str(x) + expected_result = { + x: x_prep, + point_obj: "ntPoint:{%s:int:11}" % x, + 11: 'int:11', + } + + futures = [] + with ProcessPoolExecutor(max_workers=1) as executor: + futures.append(executor.submit(DeepHashPrep, point_obj, ignore_string_type_changes=True)) + + for future in as_completed(futures, timeout=10): + assert not future._exception + assert expected_result == future._result