Skip to content

Commit

Permalink
renamed save to _save on signature; changed e-mail to empty by default
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jun 6, 2016
1 parent 9630a1f commit a2aeca1
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
7 changes: 4 additions & 3 deletions sourmash
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ Commands can be:
help='number of hashes to use in each sketch')
parser.add_argument('-f', '--force', action='store_true')
parser.add_argument('-o', '--output', type=argparse.FileType('wt'))
parser.add_argument('--email', type=str, default='')
args = parser.parse_args(args)

print('computing signatures for files:', args.filenames,
Expand Down Expand Up @@ -151,7 +152,7 @@ Commands can be:
E.add_sequence(s, args.force)

# convert into a signature
siglist = [ sig.SourmashSignature('titus@idyll.org', E,
siglist = [ sig.SourmashSignature(args.email, E,
filename=filename) ]

# save!
Expand Down Expand Up @@ -280,6 +281,7 @@ Commands can be:
p.add_argument('mash_csvfile')
p.add_argument('-o', '--output', type=argparse.FileType('wt'),
default=sys.stdout)
p.add_argument('--email', type=str, default='')
args = p.parse_args(args)

with open(args.mash_csvfile, 'r') as fp:
Expand All @@ -302,8 +304,7 @@ Commands can be:
e = sourmash_lib.Estimators(len(hashes), ksize)
for h in hashes:
e.mh.add_hash(h)
s = sig.SourmashSignature('titus@idyll.org',
e, filename=name)
s = sig.SourmashSignature(args.email, e, filename=name)
siglist.append(s)
print('loaded signature:', name,
s.md5sum()[:8], file=sys.stderr)
Expand Down
24 changes: 18 additions & 6 deletions sourmash_lib/signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def name(self):
else:
return self.md5sum()[:8]

def save(self):
def _save(self):
"Return metadata and a dictionary containing the sketch info."
e = dict(self.d)
estimator = self.estimator
Expand All @@ -65,8 +65,9 @@ def save(self):
self.d.get('filename'), sketch

def similarity(self, other):
"Compute similarity with the stored MinHash."
"Compute similarity with the other MinHash signature."
return self.estimator.similarity(other.estimator)
jaccard = similarity


def load_signatures(data, select_ksize=None, ignore_md5sum=False):
Expand Down Expand Up @@ -135,9 +136,7 @@ def save_signatures(siglist, fp=None):
"Save multiple signatures into a YAML string (or into file handle 'fp')"
top_records = {}
for sig in siglist:
email, name, filename, sketch = sig.save()
if not email:
raise Exception('email must be non-unique')
email, name, filename, sketch = sig._save()
k = (email, name, filename)

x = top_records.get(k, [])
Expand Down Expand Up @@ -184,11 +183,24 @@ def test_roundtrip():
assert sig2.similarity(sig) == 1.0


def test_roundtrip_empty_email():
e = sourmash_lib.Estimators(n=1, ksize=20)
e.add("AT" * 10)
sig = SourmashSignature('', e)
s = save_signatures([sig])
siglist = load_signatures(s)
sig2 = siglist[0]
e2 = sig2.estimator

assert sig.similarity(sig2) == 1.0
assert sig2.similarity(sig) == 1.0


def test_md5():
e = sourmash_lib.Estimators(n=1, ksize=20)
e.mh.add_hash(5)
sig = SourmashSignature('titus@idyll.org', e)
print(sig.save())
print(sig._save())
assert sig.md5sum() == 'eae27d77ca20db309e056e3d2dcd7d69', sig.md5sum()


Expand Down

0 comments on commit a2aeca1

Please sign in to comment.