Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do add_many in Rust, use it in LCA _signatures #826

Merged
merged 3 commits into from Jan 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/sourmash.h
Expand Up @@ -93,6 +93,8 @@ uint64_t kmerminhash_get_min_idx(KmerMinHash *ptr, uint64_t idx);

const uint64_t *kmerminhash_get_mins(KmerMinHash *ptr);

void kmerminhash_add_many(KmerMinHash *ptr, const uint64_t *hashes_ptr, uintptr_t insize);

uintptr_t kmerminhash_get_mins_size(KmerMinHash *ptr);

HashFunctions kmerminhash_hash_function(KmerMinHash *ptr);
Expand Down
8 changes: 4 additions & 4 deletions sourmash/_minhash.py
Expand Up @@ -68,6 +68,8 @@ def hash_murmur(kmer, seed=MINHASH_DEFAULT_SEED):


class MinHash(RustObject):
__dealloc_func__ = lib.kmerminhash_free

def __init__(
self,
n,
Expand Down Expand Up @@ -98,7 +100,6 @@ def __init__(
self._objptr = lib.kmerminhash_new(
n, ksize, is_protein, dayhoff, hp, seed, int(max_hash), track_abundance
)
self.__dealloc_func__ = lib.kmerminhash_free

if mins:
if track_abundance:
Expand Down Expand Up @@ -191,11 +192,10 @@ def add_many(self, hashes):
if isinstance(hashes, MinHash):
self._methodcall(lib.kmerminhash_add_from, hashes._objptr)
else:
for hash in hashes:
self._methodcall(lib.kmerminhash_add_hash, hash)
self._methodcall(lib.kmerminhash_add_many, list(hashes), len(hashes))

def remove_many(self, hashes):
"Add many hashes in at once."
"Remove many hashes at once."
self._methodcall(lib.kmerminhash_remove_many, list(hashes), len(hashes))

def update(self, other):
Expand Down
6 changes: 5 additions & 1 deletion sourmash/lca/lca_utils.py
Expand Up @@ -378,10 +378,14 @@ def _signatures(self):

debug('creating signatures for LCA DB...')
sigd = defaultdict(minhash.copy_and_clear)
temp_vals = defaultdict(list)

for (k, v) in self.hashval_to_idx.items():
for vv in v:
sigd[vv].add_hash(k)
temp_vals[vv].append(k)

for sig, vals in temp_vals.items():
sigd[sig].add_many(vals)

debug('=> {} signatures!', len(sigd))
return sigd
Expand Down
24 changes: 24 additions & 0 deletions src/core/src/ffi/minhash.rs
Expand Up @@ -162,6 +162,30 @@ unsafe fn kmerminhash_get_mins(ptr: *mut KmerMinHash) -> Result<*const u64> {
}
}

ffi_fn! {
unsafe fn kmerminhash_add_many(
ptr: *mut KmerMinHash,
hashes_ptr: *const u64,
insize: usize,
) -> Result<()> {
let mh = {
assert!(!ptr.is_null());
&mut *ptr
};

let hashes = {
assert!(!hashes_ptr.is_null());
slice::from_raw_parts(hashes_ptr as *mut u64, insize)
};

for hash in hashes {
mh.add_hash(*hash);
}

Ok(())
}
}

ffi_fn! {
unsafe fn kmerminhash_get_abunds(ptr: *mut KmerMinHash) -> Result<*const u64> {
let mh = {
Expand Down