Skip to content

Commit

Permalink
change max_n_below -> min_n_below
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jun 2, 2018
1 parent 316cd80 commit a1210a1
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
8 changes: 5 additions & 3 deletions sourmash/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,15 +533,17 @@ def _fill_min_n_below(self):
if isinstance(n, Leaf):
parent = self.parent(i)
if parent.pos not in self.missing_nodes:
min_n_below = parent.node.metadata.get('min_n_below', 0)
min_n_below = parent.node.metadata.get('min_n_below', 1)
min_n_below = min(len(n.data.minhash.get_mins()),
min_n_below)
if min_n_below == 0:
min_n_below = 1
parent.node.metadata['min_n_below'] = min_n_below

current = parent
parent = self.parent(parent.pos)
while parent and parent.pos not in self.missing_nodes:
min_n_below = parent.node.metadata.get('min_n_below', 0)
min_n_below = parent.node.metadata.get('min_n_below', 1)
min_n_below = min(current.node.metadata['min_n_below'],
min_n_below)
parent.node.metadata['min_n_below'] = min_n_below
Expand Down Expand Up @@ -699,7 +701,7 @@ def load(info, storage=None):

def update(self, parent):
parent.data.update(self.data)
min_n_below = min(parent.metadata.get('min_n_below', 0),
min_n_below = min(parent.metadata.get('min_n_below', 1),
self.metadata.get('min_n_below'))
parent.metadata['min_n_below'] = min_n_below

Expand Down
19 changes: 13 additions & 6 deletions sourmash/sbtmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def update(self, parent):
min_n_below = parent.metadata.get('min_n_below', 1)
min_n_below = min(len(self.data.minhash.get_mins()),
min_n_below)

parent.metadata['min_n_below'] = min_n_below

@property
Expand Down Expand Up @@ -94,10 +95,10 @@ def search_minhashes(node, sig, threshold, results=None, downsample=True):
else: # Node or Leaf, Nodegraph by minhash comparison
if len(mins):
matches = sum(1 for value in mins if node.data.get(value))
max_mins = node.metadata.get('min_n_below', -1)
if max_mins == -1:
min_n_below = node.metadata.get('min_n_below', -1)
if min_n_below == -1:
raise Exception('cannot do similarity search on this SBT; need to rebuild.')
score = float(matches) / max_mins
score = float(matches) / min_n_below

if results is not None:
results[node.name] = score
Expand Down Expand Up @@ -130,11 +131,17 @@ def search(self, node, sig, threshold, results=None):
raise
else: # internal object, not leaf.
if len(mins):

# calculate the maximum possibility similarity score below
# this node, based on the number of matches at this node,
# divided by the smallest minhash size below this node
# (which should be an upper bound on the Jaccard similarity
# of any signature below this point)
matches = sum(1 for value in mins if node.data.get(value))
max_mins = node.metadata.get('min_n_below', -1)
if max_mins == -1:
min_n_below = node.metadata.get('min_n_below', -1)
if min_n_below == -1:
raise Exception('cannot do similarity search on this SBT; need to rebuild.')
score = float(matches) / max_mins
score = float(matches) / min_n_below

if results is not None:
results[node.name] = score
Expand Down

0 comments on commit a1210a1

Please sign in to comment.