Skip to content

Commit

Permalink
fix divide by zero issue in MinHash.contained_by (#572)
Browse files Browse the repository at this point in the history
* fix divide by zero issue in contained_by
  • Loading branch information
ctb authored and luizirber committed Dec 5, 2018
1 parent d2e2e3f commit 4aab62f
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
2 changes: 2 additions & 0 deletions sourmash/_minhash.pyx
Expand Up @@ -364,6 +364,8 @@ cdef class MinHash(object):
"""\
Calculate how much of self is contained by other.
"""
if not len(self):
return 0.0
return self.count_common(other) / len(self.get_mins())

def similarity_ignore_maxhash(self, MinHash other):
Expand Down
20 changes: 20 additions & 0 deletions tests/test__minhash.py
Expand Up @@ -70,6 +70,26 @@ def test_basic_dna(track_abundance):
assert len(b) == 1


def test_div_zero(track_abundance):
# verify that empty MHs do not yield divide by zero errors for similarity
mh = MinHash(1, 4, track_abundance=track_abundance)
mh2 = mh.copy_and_clear()

mh.add_sequence('ATGC')
assert mh.similarity(mh2) == 0
assert mh2.similarity(mh) == 0


def test_div_zero_contained(track_abundance):
# verify that empty MHs do not yield divide by zero errors for contained_by
mh = MinHash(1, 4, track_abundance=track_abundance)
mh2 = mh.copy_and_clear()

mh.add_sequence('ATGC')
assert mh.contained_by(mh2) == 0
assert mh2.contained_by(mh) == 0


def test_bytes_dna(track_abundance):
mh = MinHash(1, 4, track_abundance=track_abundance)
mh.add_sequence('ATGC')
Expand Down

0 comments on commit 4aab62f

Please sign in to comment.