From 8491a39fb43278e4436d5e5ee0284115e768a82f Mon Sep 17 00:00:00 2001 From: xuweibj Date: Thu, 21 Mar 2019 02:11:55 -0400 Subject: [PATCH 1/5] 2 files for diff json/yaml file --- deepdiff/inventorydiff.py | 114 +++++++++++++++++++++++ deepdiff/structurediff.py | 184 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 298 insertions(+) create mode 100644 deepdiff/inventorydiff.py create mode 100644 deepdiff/structurediff.py diff --git a/deepdiff/inventorydiff.py b/deepdiff/inventorydiff.py new file mode 100644 index 00000000..c8289582 --- /dev/null +++ b/deepdiff/inventorydiff.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python + +from __future__ import print_function +from structurediff import StructureDiff +#import manager as mgr +from exceptions import * +from utils import * +import re + +def line_diff(file1, file2, filename=None): + (retcode,out,err)=runCommand("diff -u %s %s"%(file1, file2)) + if out: + if filename: + out=re.sub(r"%s.*"%(file1),filename,out) + out=re.sub(r"%s.*"%(file2),filename,out) + else: + out=re.sub(r"%s.*"%(file1),file1,out) + out=re.sub(r"%s.*"%(file2),file2,out) + if err: + if filename: + err=re.sub(r"%s.*"%(file1),filename,err) + err=re.sub(r"%s.*"%(file2),filename,err) + else: + err=re.sub(r"%s.*"%(file1),file1,err) + err=re.sub(r"%s.*"%(file2),file2,err) + return out, err + + +class InventoryDiff(object): + def __init__(self, args): + self._validate_args(args) + + def _validate_args(self, args): + if args.files and args.source: + raise CommandException("Error: '--files' and '--source' cannot be used together!") + if not args.files and not args.source: + raise CommandException("Error: No valid source type!") + if not args.source and args.all: + raise CommandException("Error: '--all' must be used with '--source'!") + if not args.files and args.filename: + raise CommandException("Error: '--filename' must be used with '--files'!") + + if args.files: + self.objs = args.files + self.objtype = 'f' + self.filename = args.filename + elif args.source: + self.objs = args.source + self.objtype = 'fvso' + self.isall = args.all + + def _get_file_data(self, data_file): + data, self.fmt = loadfile(filename=data_file) + return data + + def show_diff(self, diff, source=None): + print("\n====================BEGIN=====================\n") + if source: + print(source) + print(diff) + print("\n====================END=====================\n") + + def inventory_diff(self): + rc = None + err = None + if self.objtype == 'f': + file1 = self.objs.pop(0) + file2 = self.objs.pop(0) + filename = None + if self.filename: + filename = self.filename[0] + d1=None + d2=None + try: + d1 = self._get_file_data(file1) + d2 = self._get_file_data(file2) + except FileNotExistException as e: + raise FileNotExistException(e.message) + except InvalidFileException as e: + out, err = line_diff(file1, file2, filename) + rc = 1 + + if not d1 or not d2 or type(d1)!=dict or type(d2)!=dict: + out, err = line_diff(file1, file2, filename) + rc = 1 + + if self.filename: + file1 = filename + file2 = filename + self.isall = True + elif self.objtype == 'fvso': + file1 = 'xCAT DB' + file2 = self.objs.pop(0) + try: + d2 = self._get_file_data(file2) + if type(d2) != dict: + raise InvalidValueException('Error: Format of data from file \'%s\' is not correct, please check...' % file2) + except FileNotExistException as e: + raise FileNotExistException(e.message) + except InvalidFileException as e: + raise InvalidFileException('Error: Could not get json or yaml data from file \'%s\', please check or export object to diff files' % file2) + if not rc: + d1 = mgr.export_by_type(None, None, fmt='dict') + + if rc and err: + raise InternalException(err) + + if not rc: + diff_dict = StructureDiff().diff(d1, d2, self.isall) + if diff_dict: + self.show_diff(StructureDiff().rept(diff_dict, self.fmt), "\n--- %s\n+++ %s" % (file1, file2)) + elif out: + self.show_diff(out) + diff --git a/deepdiff/structurediff.py b/deepdiff/structurediff.py new file mode 100644 index 00000000..a1c1b8b4 --- /dev/null +++ b/deepdiff/structurediff.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python + +from __future__ import print_function +import deepdiff +from utils import * +import yaml +import json +import sys +import re + +class format_diff_output(object): + def __init__(self): + self.flag_dict = {'-diff': '-', '+diff': '+'} + + def _get_path_as_list(self, path): + path_list = re.findall(r'(?<=\[\').+?(?=\'\])', path) + return path_list + + def _update_dict(self, old_dict, new_dict): + + if not old_dict and new_dict: + old_dict = new_dict + return old_dict + + if type(new_dict) == list: + old_dict += new_dict + return old_dict + + keys = new_dict.keys() + for key in keys: + if key in old_dict: + self._update_dict(old_dict[key], new_dict[key]) + else: + old_dict.update({key: new_dict[key]}) + return old_dict + + def _format_yaml(self, yamlstr): + yaml_list = yamlstr.split('\n') + new_list = [] + + while len(yaml_list) > 0: + tmp_str = yaml_list.pop(0) + diff_flag = None + flag = None + for key, flag in self.flag_dict.items(): + if key in tmp_str: + diff_flag = key + break + + if diff_flag: + null_num = 0 + if diff_flag + ': ' in tmp_str: + tmp_str = tmp_str.replace('%s: ' % diff_flag, '') + if tmp_str: + tmp_str = flag + tmp_str.replace('\'', '') + new_list.append(tmp_str) + else: + null_num = tmp_str.count(' ') + while len(yaml_list) > 0: + tmp_str = yaml_list.pop(0) + tmp_null_num = tmp_str.count(' ') + if tmp_null_num > null_num: + tmp_str = flag + tmp_str[2:] + new_list.append(tmp_str) + else: + yaml_list.insert(0, tmp_str) + break + else: + new_list.append(tmp_str) + + return ('\n'.join(new_list)) + + def _format_json(self, jsonstr): + json_list = jsonstr.split('\n') + new_list = [] + + while len(json_list) > 0: + tmp_str = json_list.pop(0) + diff_flag = None + flag = None + for key, flag in self.flag_dict.items(): + if key in tmp_str: + diff_flag = key + break + + if diff_flag: + bracket = 0 + if '{' in tmp_str: + bracket += 1 + while (bracket > 0 and len(json_list) > 0): + tmp_str = json_list.pop(0) + if '{' in tmp_str: + bracket += 1 + if '}' in tmp_str: + bracket -= 1 + if not bracket: + break + tmp_str = flag + tmp_str[4:] + new_list.append(tmp_str) + else: + tmp_str = tmp_str.replace('%s: ' % diff_flag, '') + tmp_str = flag + tmp_str + new_list.append(tmp_str) + + else: + new_list.append(tmp_str) + + return ('\n'.join(new_list)) + + def deal_with_diff_dict(self, result_dict): + diff_dict = {} + for key, value in result_dict.items(): + for change in value: + mychange = {} + if 'added' in key: + if 'iterable' in key: + path = self._get_path_as_list(change.up.path()) + extra = path.pop() + mychange = {extra: ['+diff: %s' % change.t2]} + else: + path= self._get_path_as_list(change.path()) + extra = path.pop() + mychange = { '+diff': {extra: change.t2}} + elif 'removed' in key: + if 'iterable' in key: + path = self._get_path_as_list(change.up.path()) + extra = path.pop() + mychange = {extra: ['-diff: %s' % change.t1]} + else: + path = self._get_path_as_list(change.path()) + extra = path.pop() + mychange = {'-diff': {extra: change.t1}} + elif 'changed' in key: + path = self._get_path_as_list(change.path()) + extra = path.pop() + mychange = {'-diff': {extra: change.t1}, '+diff': {extra: change.t2}} + elif 'type_changes' in key: + path = self._get_path_as_list(change.path()) + extra = path.pop() + if change.t1 == None: + change.t1 = '' + if change.t2 == None: + change.t2 = '' + mychange = {'-diff': {extra: change.t1}, '+diff': {extra: change.t2}} + + while len(path) > 0: + key_str = path.pop() + mychange = {key_str: mychange} + + for change_key in mychange: + diff_dict = self._update_dict(diff_dict, {change_key: mychange[change_key]}) + return diff_dict + + def get_diff_string(self, format_type, diff_dict): + if format_type == 'json': + diff_json = json.dumps(diff_dict, indent=4, separators=(',', ': ')) + return (self._format_json(diff_json)) + else: + diff_yaml = yaml.safe_dump(diff_dict, default_flow_style=False,allow_unicode=True) + return (self._format_yaml(diff_yaml)) + + +class StructureDiff(object): + + def __init__(self): + pass + + def _get_deepdiff(self, obj1, obj2): + self.diff = deepdiff.DeepDiff(obj1,obj2,ignore_order=True,report_repetition=False,exclude_paths='',significant_digits=None,view='tree',verbose_level=1) + + def rept(self, diff_dict, fmt): + diff_string = format_diff_output().get_diff_string(fmt, diff_dict) + + if diff_string and diff_string != '{}': + return diff_string + else: + return + + def diff(self, obj1, obj2, isall=False): + if not isall: + obj1 = filter_dict_keys(obj1, obj2) + self._get_deepdiff(obj1, obj2) + diff_dict = format_diff_output().deal_with_diff_dict(self.diff) + return diff_dict From 7f8ed1170ea2cea25498ecb4cf54272a7db3ca94 Mon Sep 17 00:00:00 2001 From: Margaret G Date: Fri, 22 Mar 2019 14:09:28 -0700 Subject: [PATCH 2/5] Fix typo in diff_doc.rst --- deepdiff/diff_doc.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 64f10929..975aef38 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -91,7 +91,7 @@ Example of using the text view. >>> print(ddiff) {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} -So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. +So for example ddiff['dictionary_item_added'] is a set of strings thus this is called the text view. .. seealso:: The following examples are using the *default text view.* From 11f71e3abd2b294175da22e25a17c1e74eb111ea Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 23 Mar 2019 18:49:16 -0400 Subject: [PATCH 3/5] Declare dependency on Python version. Fixes #130. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 1c633b3a..89f642a7 100755 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def get_reqs(filename): long_description=long_description, long_description_content_type='text/markdown', install_requires=reqs, + python_requires='>=3.4', classifiers=[ "Intended Audience :: Developers", "Operating System :: OS Independent", From d8cade968f45aa24809c48fcf8bde28daa87987b Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 23 Mar 2019 22:34:55 -0700 Subject: [PATCH 4/5] fixing 4.0.0 breaks: No such file or directory: 'requirements.txt' #127 --- MANIFEST.in | 1 + deepdiff/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 28f916f2..18aeaf36 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include *.rst +include *.txt global-exclude __pycache__ global-exclude *.py[co] diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index a64b76ab..cfc2ffea 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep and DeepHash classes.""" # flake8: noqa -__version__ = '4.0.0' +__version__ = '4.0.1' import logging if __name__ == '__main__': From b3e2b1084b965970717759b9d0ad9092119f482c Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Sat, 23 Mar 2019 23:44:47 -0700 Subject: [PATCH 5/5] fixing installation issues and making murmur3 optional --- README.md | 31 ++++++- deepdiff/deephash.py | 20 +++-- deepdiff/deephash_doc.rst | 9 +- deepdiff/diff.py | 2 +- deepdiff/diff_doc.rst | 2 +- deepdiff/inventorydiff.py | 114 ----------------------- deepdiff/structurediff.py | 184 -------------------------------------- docs/conf.py | 4 +- docs/index.rst | 46 +++++++++- requirements-dev.txt | 1 + requirements.txt | 1 - 11 files changed, 97 insertions(+), 317 deletions(-) delete mode 100644 deepdiff/inventorydiff.py delete mode 100644 deepdiff/structurediff.py diff --git a/README.md b/README.md index b5fda0de..99c09d3a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 4.0.0 +# DeepDiff v 4.0.1 ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -13,7 +13,7 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 -**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** +**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to support Python 2** - [Documentation](http://deepdiff.readthedocs.io/en/latest/) @@ -23,7 +23,15 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 ### Install from PyPi: - pip install deepdiff + `pip install deepdiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install Murmur3 by running: + + `pip install mmh3` + +Otherwise DeepDiff will be using SHA256 for hashing which is a cryptographic hash and is considerably slower. + +If you are running into trouble installing Murmur3, please take a look at the [Troubleshoot](#troubleshoot) section. ### Importing @@ -388,8 +396,25 @@ And here is more info: +# Troubleshoot + +## Murmur3 + +`Failed to build mmh3 when installing DeepDiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` + +On MacOS Mojave some user experience difficulty when installing Murmur3. + +The problem can be solved by running: + + `xcode-select --install` + +And then running `pip install mmh3` + # ChangeLog +- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. - v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 684d5ab6..161c931e 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -1,22 +1,26 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -import mmh3 import logging from collections import Iterable from collections import MutableMapping from collections import defaultdict from decimal import Decimal -from hashlib import sha1 +from hashlib import sha1, sha256 from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, current_dir, convert_item_or_items_into_compiled_regexes_else_none, get_id) from deepdiff.base import Base - logger = logging.getLogger(__name__) +try: + import mmh3 +except ImportError: + logger.warning('Can not find Murmur3 hashing installed. Switching to SHA256 as the default hash. Refer to https://github.com/seperman/deepdiff#murmur3 for more info.') + mmh3 = False + UNPROCESSED = 'unprocessed' MURMUR_SEED = 1203 @@ -77,8 +81,8 @@ def __init__(self, self.ignore_repetition = ignore_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) - - self.hasher = self.murmur3_128bit if hasher is None else hasher + default_hasher = self.murmur3_128bit if mmh3 else self.sha256hex + self.hasher = default_hasher if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) self[UNPROCESSED] = [] @@ -101,6 +105,12 @@ def __init__(self, else: del self[UNPROCESSED] + @staticmethod + def sha256hex(obj): + """Use Sha256 as a cryptographic hash.""" + obj = obj.encode('utf-8') + return sha256(obj).hexdigest() + @staticmethod def sha1hex(obj): """Use Sha1 as a cryptographic hash.""" diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 8318c161..7f1fab16 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -10,6 +10,8 @@ At the core of it, DeepHash is a deterministic serialization of your object into can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead. +If it can't find Murmur3 package (mmh3) installed, it uses Python's built-in SHA256 for hashing which is considerably slower than Murmur3. So it is advised that you install Murmur3 by running `pip install mmh3` + **Import** >>> from deepdiff import DeepHash @@ -89,13 +91,10 @@ By setting it to True, both the string and bytes of hello return the same hash. >>> DeepHash('hello', ignore_string_type_changes=True) {'hello': 221860156526691709602818861774599422448} -ignore_numeric_type_changes -Default: False - -ignore_numeric_type_changes: Boolean, default = True +ignore_numeric_type_changes: Boolean, default = False numeric type conversions should not affect the hash output when this is set to True. For example 10, 10.0 and Decimal(10) should produce the same hash. - However when ignore_numeric_type_changes is set to True, all numbers are converted + When ignore_numeric_type_changes is set to True, all numbers are converted to decimals with the precision of significant_digits parameter. If no significant_digits is passed by the user, a default value of 55 is used. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index ea6d41cc..245c4625 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -57,7 +57,7 @@ def __init__(self, ignore_numeric_type_changes=False, verbose_level=1, view=TEXT_VIEW, - hasher=DeepHash.murmur3_128bit, + hasher=None, **kwargs): if kwargs: raise ValueError(( diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 975aef38..d5206683 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -328,7 +328,7 @@ The shortcuts are ignore_string_type_changes which by default is False and ignor For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: -1. Set ignore_string_type_changes=True which is the default. +1. Set ignore_string_type_changes=True. 2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . Now what if you want also typeA and typeB to be ignored when comparing agains each other? diff --git a/deepdiff/inventorydiff.py b/deepdiff/inventorydiff.py deleted file mode 100644 index c8289582..00000000 --- a/deepdiff/inventorydiff.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -from structurediff import StructureDiff -#import manager as mgr -from exceptions import * -from utils import * -import re - -def line_diff(file1, file2, filename=None): - (retcode,out,err)=runCommand("diff -u %s %s"%(file1, file2)) - if out: - if filename: - out=re.sub(r"%s.*"%(file1),filename,out) - out=re.sub(r"%s.*"%(file2),filename,out) - else: - out=re.sub(r"%s.*"%(file1),file1,out) - out=re.sub(r"%s.*"%(file2),file2,out) - if err: - if filename: - err=re.sub(r"%s.*"%(file1),filename,err) - err=re.sub(r"%s.*"%(file2),filename,err) - else: - err=re.sub(r"%s.*"%(file1),file1,err) - err=re.sub(r"%s.*"%(file2),file2,err) - return out, err - - -class InventoryDiff(object): - def __init__(self, args): - self._validate_args(args) - - def _validate_args(self, args): - if args.files and args.source: - raise CommandException("Error: '--files' and '--source' cannot be used together!") - if not args.files and not args.source: - raise CommandException("Error: No valid source type!") - if not args.source and args.all: - raise CommandException("Error: '--all' must be used with '--source'!") - if not args.files and args.filename: - raise CommandException("Error: '--filename' must be used with '--files'!") - - if args.files: - self.objs = args.files - self.objtype = 'f' - self.filename = args.filename - elif args.source: - self.objs = args.source - self.objtype = 'fvso' - self.isall = args.all - - def _get_file_data(self, data_file): - data, self.fmt = loadfile(filename=data_file) - return data - - def show_diff(self, diff, source=None): - print("\n====================BEGIN=====================\n") - if source: - print(source) - print(diff) - print("\n====================END=====================\n") - - def inventory_diff(self): - rc = None - err = None - if self.objtype == 'f': - file1 = self.objs.pop(0) - file2 = self.objs.pop(0) - filename = None - if self.filename: - filename = self.filename[0] - d1=None - d2=None - try: - d1 = self._get_file_data(file1) - d2 = self._get_file_data(file2) - except FileNotExistException as e: - raise FileNotExistException(e.message) - except InvalidFileException as e: - out, err = line_diff(file1, file2, filename) - rc = 1 - - if not d1 or not d2 or type(d1)!=dict or type(d2)!=dict: - out, err = line_diff(file1, file2, filename) - rc = 1 - - if self.filename: - file1 = filename - file2 = filename - self.isall = True - elif self.objtype == 'fvso': - file1 = 'xCAT DB' - file2 = self.objs.pop(0) - try: - d2 = self._get_file_data(file2) - if type(d2) != dict: - raise InvalidValueException('Error: Format of data from file \'%s\' is not correct, please check...' % file2) - except FileNotExistException as e: - raise FileNotExistException(e.message) - except InvalidFileException as e: - raise InvalidFileException('Error: Could not get json or yaml data from file \'%s\', please check or export object to diff files' % file2) - if not rc: - d1 = mgr.export_by_type(None, None, fmt='dict') - - if rc and err: - raise InternalException(err) - - if not rc: - diff_dict = StructureDiff().diff(d1, d2, self.isall) - if diff_dict: - self.show_diff(StructureDiff().rept(diff_dict, self.fmt), "\n--- %s\n+++ %s" % (file1, file2)) - elif out: - self.show_diff(out) - diff --git a/deepdiff/structurediff.py b/deepdiff/structurediff.py deleted file mode 100644 index a1c1b8b4..00000000 --- a/deepdiff/structurediff.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -import deepdiff -from utils import * -import yaml -import json -import sys -import re - -class format_diff_output(object): - def __init__(self): - self.flag_dict = {'-diff': '-', '+diff': '+'} - - def _get_path_as_list(self, path): - path_list = re.findall(r'(?<=\[\').+?(?=\'\])', path) - return path_list - - def _update_dict(self, old_dict, new_dict): - - if not old_dict and new_dict: - old_dict = new_dict - return old_dict - - if type(new_dict) == list: - old_dict += new_dict - return old_dict - - keys = new_dict.keys() - for key in keys: - if key in old_dict: - self._update_dict(old_dict[key], new_dict[key]) - else: - old_dict.update({key: new_dict[key]}) - return old_dict - - def _format_yaml(self, yamlstr): - yaml_list = yamlstr.split('\n') - new_list = [] - - while len(yaml_list) > 0: - tmp_str = yaml_list.pop(0) - diff_flag = None - flag = None - for key, flag in self.flag_dict.items(): - if key in tmp_str: - diff_flag = key - break - - if diff_flag: - null_num = 0 - if diff_flag + ': ' in tmp_str: - tmp_str = tmp_str.replace('%s: ' % diff_flag, '') - if tmp_str: - tmp_str = flag + tmp_str.replace('\'', '') - new_list.append(tmp_str) - else: - null_num = tmp_str.count(' ') - while len(yaml_list) > 0: - tmp_str = yaml_list.pop(0) - tmp_null_num = tmp_str.count(' ') - if tmp_null_num > null_num: - tmp_str = flag + tmp_str[2:] - new_list.append(tmp_str) - else: - yaml_list.insert(0, tmp_str) - break - else: - new_list.append(tmp_str) - - return ('\n'.join(new_list)) - - def _format_json(self, jsonstr): - json_list = jsonstr.split('\n') - new_list = [] - - while len(json_list) > 0: - tmp_str = json_list.pop(0) - diff_flag = None - flag = None - for key, flag in self.flag_dict.items(): - if key in tmp_str: - diff_flag = key - break - - if diff_flag: - bracket = 0 - if '{' in tmp_str: - bracket += 1 - while (bracket > 0 and len(json_list) > 0): - tmp_str = json_list.pop(0) - if '{' in tmp_str: - bracket += 1 - if '}' in tmp_str: - bracket -= 1 - if not bracket: - break - tmp_str = flag + tmp_str[4:] - new_list.append(tmp_str) - else: - tmp_str = tmp_str.replace('%s: ' % diff_flag, '') - tmp_str = flag + tmp_str - new_list.append(tmp_str) - - else: - new_list.append(tmp_str) - - return ('\n'.join(new_list)) - - def deal_with_diff_dict(self, result_dict): - diff_dict = {} - for key, value in result_dict.items(): - for change in value: - mychange = {} - if 'added' in key: - if 'iterable' in key: - path = self._get_path_as_list(change.up.path()) - extra = path.pop() - mychange = {extra: ['+diff: %s' % change.t2]} - else: - path= self._get_path_as_list(change.path()) - extra = path.pop() - mychange = { '+diff': {extra: change.t2}} - elif 'removed' in key: - if 'iterable' in key: - path = self._get_path_as_list(change.up.path()) - extra = path.pop() - mychange = {extra: ['-diff: %s' % change.t1]} - else: - path = self._get_path_as_list(change.path()) - extra = path.pop() - mychange = {'-diff': {extra: change.t1}} - elif 'changed' in key: - path = self._get_path_as_list(change.path()) - extra = path.pop() - mychange = {'-diff': {extra: change.t1}, '+diff': {extra: change.t2}} - elif 'type_changes' in key: - path = self._get_path_as_list(change.path()) - extra = path.pop() - if change.t1 == None: - change.t1 = '' - if change.t2 == None: - change.t2 = '' - mychange = {'-diff': {extra: change.t1}, '+diff': {extra: change.t2}} - - while len(path) > 0: - key_str = path.pop() - mychange = {key_str: mychange} - - for change_key in mychange: - diff_dict = self._update_dict(diff_dict, {change_key: mychange[change_key]}) - return diff_dict - - def get_diff_string(self, format_type, diff_dict): - if format_type == 'json': - diff_json = json.dumps(diff_dict, indent=4, separators=(',', ': ')) - return (self._format_json(diff_json)) - else: - diff_yaml = yaml.safe_dump(diff_dict, default_flow_style=False,allow_unicode=True) - return (self._format_yaml(diff_yaml)) - - -class StructureDiff(object): - - def __init__(self): - pass - - def _get_deepdiff(self, obj1, obj2): - self.diff = deepdiff.DeepDiff(obj1,obj2,ignore_order=True,report_repetition=False,exclude_paths='',significant_digits=None,view='tree',verbose_level=1) - - def rept(self, diff_dict, fmt): - diff_string = format_diff_output().get_diff_string(fmt, diff_dict) - - if diff_string and diff_string != '{}': - return diff_string - else: - return - - def diff(self, obj1, obj2, isall=False): - if not isall: - obj1 = filter_dict_keys(obj1, obj2) - self._get_deepdiff(obj1, obj2) - diff_dict = format_diff_output().deal_with_diff_dict(self.diff) - return diff_dict diff --git a/docs/conf.py b/docs/conf.py index 5697e04f..55260b6c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '4.0.0' +version = '4.0.1' # The full version, including alpha/beta/rc tags. -release = '4.0.0' +release = '4.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 0e168247..515c84fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 4.0.0 documentation! +DeepDiff 4.0.1 documentation! ============================= **DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes.** @@ -25,6 +25,15 @@ Install from PyPi:: pip install deepdiff +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install Murmur3 by running:: + + pip install mmh3 + +Otherwise DeepDiff will be using SHA256 for hashing which is a cryptographic hash and is considerably slower. + +If you are running into trouble installing Murmur3, please take a look at the `Troubleshoot <#troubleshoot>`__ section. + + Importing ~~~~~~~~~ @@ -38,6 +47,12 @@ Importing DeepDiff ******** +Read The DeepDiff details in: + +:doc:`/diff` + +Short introduction:: + Supported data types ~~~~~~~~~~~~~~~~~~~~ @@ -174,6 +189,12 @@ The core of DeepHash is a deterministic serialization of your object into a stri can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. but you can pass another hash function to it if you want. +Read the details at: + +:doc:`/deephash` + +Examples: + Let's say you have a dictionary object. .. code:: python @@ -216,6 +237,28 @@ Read more in the Deep Hash reference: :doc:`/deephash` +************ +Troubleshoot +************ + +Murmur3 +~~~~~~~ + +`Failed to build mmh3 when installing DeepDiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` + +On MacOS Mojave some user experience difficulty when installing Murmur3. + +The problem can be solved by running: + + `xcode-select --install` + +And then running + + `pip install mmh3` + + References ========== @@ -238,6 +281,7 @@ Indices and tables Changelog ========= +- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. - v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes diff --git a/requirements-dev.txt b/requirements-dev.txt index 94563262..3b633e3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,3 +2,4 @@ pytest==4.0.1 pytest-cov==2.6.0 numpy==1.15.4 +mmh3==2.5.1 diff --git a/requirements.txt b/requirements.txt index 38a19407..23df8ce4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -mmh3==2.5.1 jsonpickle==1.0 ordered-set==3.1