From 4aab62f65fb08044e9a43ec49331b65be8b5ae15 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 4 Dec 2018 17:35:27 -0800 Subject: [PATCH] fix divide by zero issue in MinHash.contained_by (#572) * fix divide by zero issue in contained_by --- sourmash/_minhash.pyx | 2 ++ tests/test__minhash.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sourmash/_minhash.pyx b/sourmash/_minhash.pyx index b4aeb5ba48..5df822c0c9 100644 --- a/sourmash/_minhash.pyx +++ b/sourmash/_minhash.pyx @@ -364,6 +364,8 @@ cdef class MinHash(object): """\ Calculate how much of self is contained by other. """ + if not len(self): + return 0.0 return self.count_common(other) / len(self.get_mins()) def similarity_ignore_maxhash(self, MinHash other): diff --git a/tests/test__minhash.py b/tests/test__minhash.py index d8592eff8c..00ac5a6e91 100644 --- a/tests/test__minhash.py +++ b/tests/test__minhash.py @@ -70,6 +70,26 @@ def test_basic_dna(track_abundance): assert len(b) == 1 +def test_div_zero(track_abundance): + # verify that empty MHs do not yield divide by zero errors for similarity + mh = MinHash(1, 4, track_abundance=track_abundance) + mh2 = mh.copy_and_clear() + + mh.add_sequence('ATGC') + assert mh.similarity(mh2) == 0 + assert mh2.similarity(mh) == 0 + + +def test_div_zero_contained(track_abundance): + # verify that empty MHs do not yield divide by zero errors for contained_by + mh = MinHash(1, 4, track_abundance=track_abundance) + mh2 = mh.copy_and_clear() + + mh.add_sequence('ATGC') + assert mh.contained_by(mh2) == 0 + assert mh2.contained_by(mh) == 0 + + def test_bytes_dna(track_abundance): mh = MinHash(1, 4, track_abundance=track_abundance) mh.add_sequence('ATGC')