From 4d689daa01721bc3883e9b2937a5d21ee9a66e8f Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Thu, 10 Aug 2017 23:20:57 +0200 Subject: [PATCH 1/6] Implemented PMap.__eq__. --- performance_suites/pmap.py | 94 +++++++++++++++++++++++++++++++++++++- pyrsistent/_pmap.py | 22 ++++++++- tests/map_test.py | 61 +++++++++++++++++++++++-- 3 files changed, 171 insertions(+), 6 deletions(-) diff --git a/performance_suites/pmap.py b/performance_suites/pmap.py index 9cfed2e..3cb23fc 100644 --- a/performance_suites/pmap.py +++ b/performance_suites/pmap.py @@ -163,4 +163,96 @@ def iteration_large_pmap(): @Benchmarked(setup=_large_dict) def reference_iteration_large_dict(): for k in large_dict: - pass \ No newline at end of file + pass + + +# #################### Comparison ######################## + +def _different_pmaps_same_size(): + pmap1 = pmap(dict((i, i) for i in range(2000))) + pmap2 = pmap(dict((i, i + 1) for i in range(2000))) + + +def _different_pmaps_different_size(): + pmap1 = pmap(dict((i, i) for i in range(2000))) + pmap2 = pmap(dict((i, i + 1) for i in range(1500))) + + +def _equal_pmaps(): + pmap1 = pmap(dict((i, i) for i in range(2000))) + pmap2 = pmap(dict((i, i) for i in range(2000))) + + +def _equal_pmap_and_dict(): + dict1 = dict((i, i) for i in range(2000)) + pmap1 = pmap(dict((i, i) for i in range(2000))) + + +def _equal_dicts(): + dict1 = dict((i, i) for i in range(2000)) + dict2 = dict((i, i) for i in range(2000)) + + +def _different_dicts_same_size(): + dict1 = dict((i, i) for i in range(2000)) + dict2 = dict((i, i + 1) for i in range(2000)) + + +def _different_dicts_different_size(): + dict1 = dict((i, i) for i in range(2000)) + dict2 = dict((i, i + 1) for i in range(2000)) + + +def _equal_pmaps_different_bucket_size(): + pmap1 = pmap(dict((i, i) for i in range(2000)), 50) + pmap2 = pmap(dict((i, i) for i in range(2000)), 100) + + +@Benchmarked(setup=_large_pmap) +def compare_same_pmap(): + large_pmap == large_pmap + + +@Benchmarked(setup=_large_dict) +def reference_compare_same_dict(): + large_dict == large_dict + + +@Benchmarked(setup=_equal_pmaps) +def compare_equal_pmaps(): + pmap1 == pmap2 + + +@Benchmarked(setup=_equal_dicts) +def reference_compare_equal_dicts(): + dict1 == dict2 + + +@Benchmarked(setup=_equal_pmap_and_dict) +def compare_equal_pmap_and_dict(): + pmap1 == dict1 + + +@Benchmarked(setup=_equal_pmaps_different_bucket_size) +def compare_equal_pmaps_different_bucket_size(): + pmap1 == pmap2 + + +@Benchmarked(setup=_different_pmaps_same_size) +def compare_different_pmaps_same_size(): + pmap1 == pmap2 + + +@Benchmarked(setup=_different_dicts_same_size) +def reference_compare_different_dicts_same_size(): + dict1 == dict2 + + +@Benchmarked(setup=_different_pmaps_different_size) +def compare_different_pmaps_different_size(): + pmap1 == pmap2 + + +@Benchmarked(setup=_different_dicts_different_size) +def reference_compare_different_dicts_different_size(): + dict1 == dict2 diff --git a/pyrsistent/_pmap.py b/pyrsistent/_pmap.py index f7ba577..c8a43ab 100644 --- a/pyrsistent/_pmap.py +++ b/pyrsistent/_pmap.py @@ -125,7 +125,27 @@ def __len__(self): def __repr__(self): return 'pmap({0})'.format(str(dict(self))) - __eq__ = Mapping.__eq__ + def __eq__(self, other): + if self is other: + return True + if not isinstance(other, Mapping): + return NotImplemented + if len(self) != len(other): + return False + if isinstance(other, PMap): + try: + if hash(self) != hash(other): + return False + except TypeError: + # self or other contains non-hashable values + pass + if self._buckets == other._buckets: + return True + return dict(self.iteritems()) == dict(other.iteritems()) + elif isinstance(other, dict): + return dict(self.iteritems()) == other + return dict(self.iteritems()) == dict(six.iteritems(other)) + __ne__ = Mapping.__ne__ def __lt__(self, other): diff --git a/tests/map_test.py b/tests/map_test.py index 9635e9a..d72750b 100644 --- a/tests/map_test.py +++ b/tests/map_test.py @@ -117,13 +117,11 @@ def test_overwrite_existing_element(): assert map2['a'] == 3 -def test_supports_hash_and_equals(): +def test_hash(): x = m(a=1, b=2, c=3) y = m(a=1, b=2, c=3) - + assert hash(x) == hash(y) - assert x == y - assert not (x != y) def test_same_hash_when_content_the_same_but_underlying_vector_size_differs(): @@ -156,6 +154,61 @@ def test_map_does_not_hash_values_on_second_hash_invocation(): hash(x) +def test_equal(): + x = m(a=1, b=2, c=3) + y = m(a=1, b=2, c=3) + + assert x == y + assert not (x != y) + + assert y == x + assert not (y != x) + + +def test_equal_to_dict(): + x = m(a=1, b=2, c=3) + y = dict(a=1, b=2, c=3) + + assert x == y + assert not (x != y) + + assert y == x + assert not (y != x) + + +def test_equal_with_different_bucket_sizes(): + x = pmap({'a': 1, 'b': 2}, 50) + y = pmap({'a': 1, 'b': 2}, 10) + + assert x == y + assert not (x != y) + + assert y == x + assert not (y != x) + + +def test_not_equal(): + x = m(a=1, b=2, c=3) + y = m(a=1, b=2) + + assert x != y + assert not (x == y) + + assert y != x + assert not (y == x) + + +def test_not_equal_to_dict(): + x = m(a=1, b=2, c=3) + y = dict(a=1, b=2, d=4) + + assert x != y + assert not (x == y) + + assert y != x + assert not (y == x) + + def test_update_with_multiple_arguments(): # If same value is present in multiple sources, the rightmost is used. x = m(a=1, b=2, c=3) From d6268ae771347cfd381608a8c2758a9d3fc46743 Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Wed, 23 Aug 2017 14:14:07 +0200 Subject: [PATCH 2/6] Added test and benchmark for equality check worst case scenario. --- performance_suites/pmap.py | 14 ++++++++++++-- tests/map_test.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/performance_suites/pmap.py b/performance_suites/pmap.py index 3cb23fc..f1f3c88 100644 --- a/performance_suites/pmap.py +++ b/performance_suites/pmap.py @@ -204,8 +204,13 @@ def _different_dicts_different_size(): def _equal_pmaps_different_bucket_size(): - pmap1 = pmap(dict((i, i) for i in range(2000)), 50) - pmap2 = pmap(dict((i, i) for i in range(2000)), 100) + pmap1 = pmap(dict((i, i) for i in range(2000)), 1999) + pmap2 = pmap(dict((i, i) for i in range(2000)), 2000) + + +def _equal_pmaps_same_bucket_size_different_insertion_order(): + pmap1 = pmap([(i, i) for i in range(2000)], 1999) + pmap2 = pmap([(i, i) for i in range(1999, -1, -1)], 1999) @Benchmarked(setup=_large_pmap) @@ -233,6 +238,11 @@ def compare_equal_pmap_and_dict(): pmap1 == dict1 +@Benchmarked(setup=_equal_pmaps_same_bucket_size_different_insertion_order) +def compare_equal_pmaps_different_insertion_order(): + pmap1 == pmap2 + + @Benchmarked(setup=_equal_pmaps_different_bucket_size) def compare_equal_pmaps_different_bucket_size(): pmap1 == pmap2 diff --git a/tests/map_test.py b/tests/map_test.py index d72750b..171b017 100644 --- a/tests/map_test.py +++ b/tests/map_test.py @@ -187,6 +187,17 @@ def test_equal_with_different_bucket_sizes(): assert not (y != x) +def test_equal_with_different_insertion_order(): + x = pmap([(i, i) for i in range(50)], 10) + y = pmap([(i, i) for i in range(49, -1, -1)], 10) + + assert x == y + assert not (x != y) + + assert y == x + assert not (y != x) + + def test_not_equal(): x = m(a=1, b=2, c=3) y = m(a=1, b=2) From a303c4ebacbc51031682318193f68303344a01e3 Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Sat, 26 Aug 2017 09:49:08 +0200 Subject: [PATCH 3/6] Added length check to pvector equality check. --- pvectorcmodule.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pvectorcmodule.c b/pvectorcmodule.c index 3782ba1..38ff669 100644 --- a/pvectorcmodule.c +++ b/pvectorcmodule.c @@ -365,6 +365,16 @@ static PyObject* PVector_richcompare(PyObject *v, PyObject *w, int op) { vlen = vt->count; wlen = wt->count; + if (vlen != wlen) { + if (op == Py_EQ) { + Py_INCREF(Py_False); + return Py_False; + } else if (op == Py_NE) { + Py_INCREF(Py_True); + return Py_True; + } + } + /* Search for the first index where items are different. */ PyObject *left = NULL; PyObject *right = NULL; From 4da1afa1b0ce89e570fff94e0ed8750a7df569e8 Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Sat, 26 Aug 2017 09:49:25 +0200 Subject: [PATCH 4/6] Generate hash while comparing for equality. --- pyrsistent/_pmap.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/pyrsistent/_pmap.py b/pyrsistent/_pmap.py index c8a43ab..0f12cae 100644 --- a/pyrsistent/_pmap.py +++ b/pyrsistent/_pmap.py @@ -133,14 +133,35 @@ def __eq__(self, other): if len(self) != len(other): return False if isinstance(other, PMap): - try: - if hash(self) != hash(other): - return False - except TypeError: - # self or other contains non-hashable values - pass + if (hasattr(self, '_cached_hash') and hasattr(other, '_cached_hash') + and self._cached_hash != other._cached_hash): + return False if self._buckets == other._buckets: return True + self_as_set = other_as_set = self_hash = other_hash = None + if hasattr(self, '_cached_hash'): + self_hash = self._cached_hash + else: + self_as_set = frozenset(self.iteritems()) + try: + self._cached_hash = self_hash = hash(self_as_set) + except TypeError: + # self contains non-hashable values + pass + if self_hash is not None: + if hasattr(other, '_cached_hash'): + other_hash = other._cached_hash + else: + other_as_set = frozenset(other.iteritems()) + try: + other._cached_hash = other_hash = hash(other_as_set) + except TypeError: + # other contains non-hashable values + pass + if other_hash is not None and self_hash != other_hash: + return False + if self_as_set is not None and other_as_set is not None: + return self_as_set == other_as_set return dict(self.iteritems()) == dict(other.iteritems()) elif isinstance(other, dict): return dict(self.iteritems()) == other From 059d5591895e17b04ea1d64c036d5b9a126087f3 Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Thu, 16 Nov 2017 07:13:00 +0100 Subject: [PATCH 5/6] Removed some code from PMap.__eq__(). --- pyrsistent/_pmap.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pyrsistent/_pmap.py b/pyrsistent/_pmap.py index 0f12cae..9319d0e 100644 --- a/pyrsistent/_pmap.py +++ b/pyrsistent/_pmap.py @@ -138,30 +138,6 @@ def __eq__(self, other): return False if self._buckets == other._buckets: return True - self_as_set = other_as_set = self_hash = other_hash = None - if hasattr(self, '_cached_hash'): - self_hash = self._cached_hash - else: - self_as_set = frozenset(self.iteritems()) - try: - self._cached_hash = self_hash = hash(self_as_set) - except TypeError: - # self contains non-hashable values - pass - if self_hash is not None: - if hasattr(other, '_cached_hash'): - other_hash = other._cached_hash - else: - other_as_set = frozenset(other.iteritems()) - try: - other._cached_hash = other_hash = hash(other_as_set) - except TypeError: - # other contains non-hashable values - pass - if other_hash is not None and self_hash != other_hash: - return False - if self_as_set is not None and other_as_set is not None: - return self_as_set == other_as_set return dict(self.iteritems()) == dict(other.iteritems()) elif isinstance(other, dict): return dict(self.iteritems()) == other From 220e17bd4efc8ab48e7bc24a8c32fef7765e4d42 Mon Sep 17 00:00:00 2001 From: Dennis Tomas Date: Thu, 16 Nov 2017 07:22:24 +0100 Subject: [PATCH 6/6] Added length check to PythonPVector equality check. --- pyrsistent/_pvector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyrsistent/_pvector.py b/pyrsistent/_pvector.py index 663942d..a2633bd 100644 --- a/pyrsistent/_pvector.py +++ b/pyrsistent/_pvector.py @@ -76,10 +76,10 @@ def __iter__(self): return iter(self.tolist()) def __ne__(self, other): - return compare_pvector(self, other, operator.ne) + return self._count != len(other) or compare_pvector(self, other, operator.ne) def __eq__(self, other): - return self is other or compare_pvector(self, other, operator.eq) + return self is other or self._count == len(other) and compare_pvector(self, other, operator.eq) def __gt__(self, other): return compare_pvector(self, other, operator.gt)