From a6665b1ea02c266c4412d1a310a300994e953409 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Oct 2018 16:04:07 -0500 Subject: [PATCH 1/5] Make dissoc smarter. Dissoc was really fast when the keys to remove were relatively few compared to the size of the dictionary. This version will pick the smaller container to iterate over, either building up a new dictionary or tearing down a copy. A small heuristic is used to decide if the overhead of building a set is worth the effort. My rudimentry testing shows that if the number of keys is less than 60% the length of the dictionary, tearing down a copy is faster. If keys is very large, we calculate the intersection between keys and mapping and build a new dictionary out of that intersection. --- toolz/dicttoolz.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/toolz/dicttoolz.py b/toolz/dicttoolz.py index a5f0044a..9efc2dfb 100644 --- a/toolz/dicttoolz.py +++ b/toolz/dicttoolz.py @@ -197,7 +197,7 @@ def assoc(d, key, value, factory=dict): return d2 -def dissoc(d, *keys): +def dissoc(d, *keys, **kwargs): """ Return a new dict with the given key(s) removed. New dict has d[key] deleted for each supplied key. @@ -210,11 +210,20 @@ def dissoc(d, *keys): >>> dissoc({'x': 1}, 'y') # Ignores missing keys {'x': 1} """ - d2 = copy.copy(d) - for key in keys: - if key in d2: - del d2[key] - return d2 + factory = _get_factory(dissoc, kwargs) + d2 = factory() + + if len(keys) < len(d) * .6: + d2.update(d) + for key in keys: + if key in d2: + del d2[key] + else: + remaining = set(d) + remaining.difference_update(keys) + for k in remaining: + d2[k] = d[k] + return d2 def assoc_in(d, keys, value, factory=dict): From d55666d6eb9d19b963d4be6d3cd8c2edd7533547 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Oct 2018 16:12:27 -0500 Subject: [PATCH 2/5] Remove import. --- toolz/dicttoolz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/toolz/dicttoolz.py b/toolz/dicttoolz.py index 9efc2dfb..ce2e53e3 100644 --- a/toolz/dicttoolz.py +++ b/toolz/dicttoolz.py @@ -1,4 +1,3 @@ -import copy import operator from toolz.compatibility import (map, zip, iteritems, iterkeys, itervalues, reduce) From 03cf714895ea317b84e888344d784b1fa4e167d1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Oct 2018 16:18:12 -0500 Subject: [PATCH 3/5] Fix indentation error. --- toolz/dicttoolz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolz/dicttoolz.py b/toolz/dicttoolz.py index ce2e53e3..7ede1f62 100644 --- a/toolz/dicttoolz.py +++ b/toolz/dicttoolz.py @@ -222,7 +222,7 @@ def dissoc(d, *keys, **kwargs): remaining.difference_update(keys) for k in remaining: d2[k] = d[k] - return d2 + return d2 def assoc_in(d, keys, value, factory=dict): From c6a2779023958b268105cd6e37d358d5a5563d00 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Thu, 18 Oct 2018 22:39:47 -0500 Subject: [PATCH 4/5] Add dissoc to curried namespace. Looks like it might have been forgotten. --- toolz/curried/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/toolz/curried/__init__.py b/toolz/curried/__init__.py index 43aeffd4..c9c90bbb 100644 --- a/toolz/curried/__init__.py +++ b/toolz/curried/__init__.py @@ -59,6 +59,7 @@ assoc_in = toolz.curry(toolz.assoc_in) cons = toolz.curry(toolz.cons) countby = toolz.curry(toolz.countby) +dissoc = toolz.curry(toolz.dissoc) do = toolz.curry(toolz.do) drop = toolz.curry(toolz.drop) excepts = toolz.curry(toolz.excepts) From f058785eda01fabee1e6957bcdb09c365a33dbe1 Mon Sep 17 00:00:00 2001 From: Ryan Grout Date: Wed, 5 Dec 2018 23:59:33 -0600 Subject: [PATCH 5/5] Update tests for dissoc to use factory keyword argument. --- toolz/tests/test_dicttoolz.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/toolz/tests/test_dicttoolz.py b/toolz/tests/test_dicttoolz.py index 0226e49b..d7b78648 100644 --- a/toolz/tests/test_dicttoolz.py +++ b/toolz/tests/test_dicttoolz.py @@ -90,16 +90,16 @@ def test_assoc(self): def test_dissoc(self): D, kw = self.D, self.kw - assert dissoc(D({"a": 1}), "a") == D({}) - assert dissoc(D({"a": 1, "b": 2}), "a") == D({"b": 2}) - assert dissoc(D({"a": 1, "b": 2}), "b") == D({"a": 1}) - assert dissoc(D({"a": 1, "b": 2}), "a", "b") == D({}) - assert dissoc(D({"a": 1}), "a") == dissoc(dissoc(D({"a": 1}), "a"), "a") + assert dissoc(D({"a": 1}), "a", **kw) == D({}) + assert dissoc(D({"a": 1, "b": 2}), "a", **kw) == D({"b": 2}) + assert dissoc(D({"a": 1, "b": 2}), "b", **kw) == D({"a": 1}) + assert dissoc(D({"a": 1, "b": 2}), "a", "b", **kw) == D({}) + assert dissoc(D({"a": 1}), "a", **kw) == dissoc(dissoc(D({"a": 1}), "a", **kw), "a", **kw) # Verify immutability: d = D({'x': 1}) oldd = d - d2 = dissoc(d, 'x') + d2 = dissoc(d, 'x', **kw) assert d is oldd assert d2 is not oldd