Skip to content

Commit

Permalink
Keep smaller temp lists in _signatures (#840)
Browse files Browse the repository at this point in the history
* run add_many more frequently in _signatures
* Remove key from defaultdict instead of reusing the list
* add comments
  • Loading branch information
luizirber committed Jan 15, 2020
1 parent cba11c5 commit 34514e7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
13 changes: 13 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ def time_add_hash(self):
for i in range(10000):
mh.add_hash(i)

def time_add_many(self):
mh = self.mh
mh.add_many(list(range(1000)))

def time_compare(self):
mh = self.mh
other_mh = self.populated_mh
Expand Down Expand Up @@ -84,6 +88,15 @@ def peakmem_add_sequence(self):
for seq in sequences:
mh.add_sequence(seq)

def peakmem_add_hash(self):
mh = self.mh
for i in range(10000):
mh.add_hash(i)

def peakmem_add_many(self):
mh = self.mh
mh.add_many(list(range(1000)))


####################

Expand Down
18 changes: 16 additions & 2 deletions sourmash/lca/lca_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,22 @@ def _signatures(self):

for (k, v) in self.hashval_to_idx.items():
for vv in v:
temp_vals[vv].append(k)

temp_hashes = temp_vals[vv]
temp_hashes.append(k)

# 50 is an arbitrary number. If you really want
# to micro-optimize, list is resized and grow in this pattern:
# 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
# (from https://github.com/python/cpython/blob/b2b4a51f7463a0392456f7772f33223e57fa4ccc/Objects/listobject.c#L57)
if len(temp_hashes) > 50:
sigd[vv].add_many(temp_hashes)

# Sigh, python 2... when it goes away,
# we can do `temp_hashes.clear()` instead.
del temp_vals[vv]

# We loop temp_vals again to add any remainder hashes
# (each list of hashes is smaller than 50 items)
for sig, vals in temp_vals.items():
sigd[sig].add_many(vals)

Expand Down
2 changes: 1 addition & 1 deletion src/core/src/ffi/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ unsafe fn kmerminhash_add_many(

let hashes = {
assert!(!hashes_ptr.is_null());
slice::from_raw_parts(hashes_ptr as *mut u64, insize)
slice::from_raw_parts(hashes_ptr as *const u64, insize)
};

for hash in hashes {
Expand Down

0 comments on commit 34514e7

Please sign in to comment.