# Table of Contents
 <p><div class="lev1"><a href="#MMETSP"><span class="toc-item-num">1&nbsp;&nbsp;</span>MMETSP</a></div>

In [1]:
cd -q ..

In [2]:
from collections import defaultdict
from glob import glob
import os
from functools import partial

from IPython.display import Image

from sourmash_lib import signature
from sbt import SBT, GraphFactory
from sbtmh import search_minhashes, SigLeaf

In [3]:
factory = GraphFactory(31, 1e5, 4)

# MMETSP

In [4]:
sig_to_search = "mmetsp/SRR1296807.left.fq.head.sig"
with open(sig_to_search, 'r') as data:
    to_search = signature.load_signatures(data)[0]

In [5]:
trees = {}
for d in (2, 5, 10):
    trees[d] = SBT(factory, d=d)

for f in glob("mmetsp/*.sig"):
    with open(f, 'r') as data:
        sig = signature.load_signatures(data)
    leaf = SigLeaf(os.path.basename(f), sig[0])
    for tree in trees.values():
        tree.add_node(leaf)

In [6]:
results = defaultdict(dict)
print('*' * 60)
print("{}:".format(sig_to_search))

for d in trees:
    search = partial(search_minhashes, results=results[d])
    print(*[(str(s.metadata), s.data.similarity(to_search))
            for s in trees[d].find(search, to_search, 0.1)],
          sep='\n')
    print()

************************************************************
mmetsp/SRR1296807.left.fq.head.sig:
('SRR1296804.left.fq.head.sig', 0.30399999022483826)
('SRR1296807.left.fq.head.sig', 1.0)
('SRR1296805.left.fq.head.sig', 0.33000001311302185)
('SRR1296806.left.fq.head.sig', 0.28999999165534973)

('SRR1296805.left.fq.head.sig', 0.33000001311302185)
('SRR1296807.left.fq.head.sig', 1.0)
('SRR1296804.left.fq.head.sig', 0.30399999022483826)
('SRR1296806.left.fq.head.sig', 0.28999999165534973)

('SRR1296804.left.fq.head.sig', 0.30399999022483826)
('SRR1296805.left.fq.head.sig', 0.33000001311302185)
('SRR1296807.left.fq.head.sig', 1.0)
('SRR1296806.left.fq.head.sig', 0.28999999165534973)



In [7]:
for n in sorted(results):
    used = sum(1 for n in trees[n].nodes if n is not None)
    total = len(trees[n].nodes)
    print("{}-ary: {} searches, {} nodes allocated ({} ({:.1f}%) used)".format(
            n, len(results[n]), total, used, 
            round(used / total, 3) * 100))

2-ary: 59 searches, 1023 nodes allocated (995 (97.3%) used)
5-ary: 61 searches, 781 nodes allocated (623 (79.8%) used)
10-ary: 81 searches, 1111 nodes allocated (554 (49.9%) used)
