diff --git a/MANIFEST.in b/MANIFEST.in index 28f916f2..18aeaf36 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include *.rst +include *.txt global-exclude __pycache__ global-exclude *.py[co] diff --git a/README.md b/README.md index b5fda0de..99c09d3a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 4.0.0 +# DeepDiff v 4.0.1 ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -13,7 +13,7 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 -**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2** +**NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to support Python 2** - [Documentation](http://deepdiff.readthedocs.io/en/latest/) @@ -23,7 +23,15 @@ Tested on Python 3.4, 3.5, 3.6, 3.7, Pypy3 ### Install from PyPi: - pip install deepdiff + `pip install deepdiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install Murmur3 by running: + + `pip install mmh3` + +Otherwise DeepDiff will be using SHA256 for hashing which is a cryptographic hash and is considerably slower. + +If you are running into trouble installing Murmur3, please take a look at the [Troubleshoot](#troubleshoot) section. ### Importing @@ -388,8 +396,25 @@ And here is more info: +# Troubleshoot + +## Murmur3 + +`Failed to build mmh3 when installing DeepDiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` + +On MacOS Mojave some user experience difficulty when installing Murmur3. + +The problem can be solved by running: + + `xcode-select --install` + +And then running `pip install mmh3` + # ChangeLog +- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. - v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index a64b76ab..cfc2ffea 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep and DeepHash classes.""" # flake8: noqa -__version__ = '4.0.0' +__version__ = '4.0.1' import logging if __name__ == '__main__': diff --git a/deepdiff/deephash.py b/deepdiff/deephash.py index 684d5ab6..161c931e 100644 --- a/deepdiff/deephash.py +++ b/deepdiff/deephash.py @@ -1,22 +1,26 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- import os -import mmh3 import logging from collections import Iterable from collections import MutableMapping from collections import defaultdict from decimal import Decimal -from hashlib import sha1 +from hashlib import sha1, sha256 from deepdiff.helper import (strings, numbers, unprocessed, not_hashed, add_to_frozen_set, convert_item_or_items_into_set_else_none, current_dir, convert_item_or_items_into_compiled_regexes_else_none, get_id) from deepdiff.base import Base - logger = logging.getLogger(__name__) +try: + import mmh3 +except ImportError: + logger.warning('Can not find Murmur3 hashing installed. Switching to SHA256 as the default hash. Refer to https://github.com/seperman/deepdiff#murmur3 for more info.') + mmh3 = False + UNPROCESSED = 'unprocessed' MURMUR_SEED = 1203 @@ -77,8 +81,8 @@ def __init__(self, self.ignore_repetition = ignore_repetition self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths) self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths) - - self.hasher = self.murmur3_128bit if hasher is None else hasher + default_hasher = self.murmur3_128bit if mmh3 else self.sha256hex + self.hasher = default_hasher if hasher is None else hasher hashes = hashes if hashes else {} self.update(hashes) self[UNPROCESSED] = [] @@ -101,6 +105,12 @@ def __init__(self, else: del self[UNPROCESSED] + @staticmethod + def sha256hex(obj): + """Use Sha256 as a cryptographic hash.""" + obj = obj.encode('utf-8') + return sha256(obj).hexdigest() + @staticmethod def sha1hex(obj): """Use Sha1 as a cryptographic hash.""" diff --git a/deepdiff/deephash_doc.rst b/deepdiff/deephash_doc.rst index 8318c161..7f1fab16 100644 --- a/deepdiff/deephash_doc.rst +++ b/deepdiff/deephash_doc.rst @@ -10,6 +10,8 @@ At the core of it, DeepHash is a deterministic serialization of your object into can be passed to a hash function. By default it uses Murmur 3 128 bit hash function which is a fast, non-cryptographic hashing function. You have the option to pass any another hashing function to be used instead. +If it can't find Murmur3 package (mmh3) installed, it uses Python's built-in SHA256 for hashing which is considerably slower than Murmur3. So it is advised that you install Murmur3 by running `pip install mmh3` + **Import** >>> from deepdiff import DeepHash @@ -89,13 +91,10 @@ By setting it to True, both the string and bytes of hello return the same hash. >>> DeepHash('hello', ignore_string_type_changes=True) {'hello': 221860156526691709602818861774599422448} -ignore_numeric_type_changes -Default: False - -ignore_numeric_type_changes: Boolean, default = True +ignore_numeric_type_changes: Boolean, default = False numeric type conversions should not affect the hash output when this is set to True. For example 10, 10.0 and Decimal(10) should produce the same hash. - However when ignore_numeric_type_changes is set to True, all numbers are converted + When ignore_numeric_type_changes is set to True, all numbers are converted to decimals with the precision of significant_digits parameter. If no significant_digits is passed by the user, a default value of 55 is used. diff --git a/deepdiff/diff.py b/deepdiff/diff.py index ea6d41cc..245c4625 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -57,7 +57,7 @@ def __init__(self, ignore_numeric_type_changes=False, verbose_level=1, view=TEXT_VIEW, - hasher=DeepHash.murmur3_128bit, + hasher=None, **kwargs): if kwargs: raise ValueError(( diff --git a/deepdiff/diff_doc.rst b/deepdiff/diff_doc.rst index 64f10929..d5206683 100644 --- a/deepdiff/diff_doc.rst +++ b/deepdiff/diff_doc.rst @@ -91,7 +91,7 @@ Example of using the text view. >>> print(ddiff) {'dictionary_item_added': [root[5], root[6]], 'dictionary_item_removed': [root[4]]} -So for example ddiff['dictionary_item_removed'] is a set if strings thus this is called the text view. +So for example ddiff['dictionary_item_added'] is a set of strings thus this is called the text view. .. seealso:: The following examples are using the *default text view.* @@ -328,7 +328,7 @@ The shortcuts are ignore_string_type_changes which by default is False and ignor For example lets say you have specifically str and byte datatypes to be ignored for type changes. Then you have a couple of options: -1. Set ignore_string_type_changes=True which is the default. +1. Set ignore_string_type_changes=True. 2. Or set ignore_type_in_groups=[(str, bytes)]. Here you are saying if we detect one type to be str and the other one bytes, do not report them as type change. It is exactly as passing ignore_type_in_groups=[DeepDiff.strings] or ignore_type_in_groups=DeepDiff.strings . Now what if you want also typeA and typeB to be ignored when comparing agains each other? diff --git a/docs/conf.py b/docs/conf.py index 5697e04f..55260b6c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '4.0.0' +version = '4.0.1' # The full version, including alpha/beta/rc tags. -release = '4.0.0' +release = '4.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 0e168247..515c84fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 4.0.0 documentation! +DeepDiff 4.0.1 documentation! ============================= **DeepDiff: Deep Difference of dictionaries, iterables, strings and other objects. It will recursively look for all the changes.** @@ -25,6 +25,15 @@ Install from PyPi:: pip install deepdiff +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install Murmur3 by running:: + + pip install mmh3 + +Otherwise DeepDiff will be using SHA256 for hashing which is a cryptographic hash and is considerably slower. + +If you are running into trouble installing Murmur3, please take a look at the `Troubleshoot <#troubleshoot>`__ section. + + Importing ~~~~~~~~~ @@ -38,6 +47,12 @@ Importing DeepDiff ******** +Read The DeepDiff details in: + +:doc:`/diff` + +Short introduction:: + Supported data types ~~~~~~~~~~~~~~~~~~~~ @@ -174,6 +189,12 @@ The core of DeepHash is a deterministic serialization of your object into a stri can be passed to a hash function. By default it uses Murmur 3 128 bit hash function. but you can pass another hash function to it if you want. +Read the details at: + +:doc:`/deephash` + +Examples: + Let's say you have a dictionary object. .. code:: python @@ -216,6 +237,28 @@ Read more in the Deep Hash reference: :doc:`/deephash` +************ +Troubleshoot +************ + +Murmur3 +~~~~~~~ + +`Failed to build mmh3 when installing DeepDiff` + +DeepDiff prefers to use Murmur3 for hashing. However you have to manually install murmur3 by running: `pip install mmh3` + +On MacOS Mojave some user experience difficulty when installing Murmur3. + +The problem can be solved by running: + + `xcode-select --install` + +And then running + + `pip install mmh3` + + References ========== @@ -238,6 +281,7 @@ Indices and tables Changelog ========= +- v4-0-1: Fixing installation Tarball missing requirements.txt . DeepDiff v4+ should not show up as pip installable for Py2. Making Murmur3 installation optional. - v4-0-0: Ending Python 2 support, Adding more functionalities and documentation for DeepHash. Switching to Pytest for testing. Switching to Murmur3 128bit for hashing. Fixing classes which inherit from classes with slots didn't have all of their slots compared. Renaming ContentHash to DeepHash. Adding exclude by path and regex path to DeepHash. Adding ignore_type_in_groups. Adding match_string to DeepSearch. Adding Timedelta object diffing. - v3-5-0: Exclude regex path - v3-3-0: Searching for objects and class attributes diff --git a/requirements-dev.txt b/requirements-dev.txt index 94563262..3b633e3b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,3 +2,4 @@ pytest==4.0.1 pytest-cov==2.6.0 numpy==1.15.4 +mmh3==2.5.1 diff --git a/requirements.txt b/requirements.txt index 38a19407..23df8ce4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -mmh3==2.5.1 jsonpickle==1.0 ordered-set==3.1 diff --git a/setup.py b/setup.py index 1c633b3a..89f642a7 100755 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ def get_reqs(filename): long_description=long_description, long_description_content_type='text/markdown', install_requires=reqs, + python_requires='>=3.4', classifiers=[ "Intended Audience :: Developers", "Operating System :: OS Independent",