Skip to content

Commit

Permalink
clean up 'describe' a little bit, add a test (#1861)
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Mar 4, 2022
1 parent 5ba3cb4 commit efebe8a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 12 deletions.
6 changes: 4 additions & 2 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -1399,8 +1399,10 @@ Identifiers are constructed by using the first space delimited word in
the signature name.

One way to build a picklist is to use `sourmash sig describe --csv
out.csv <signatures>` to construct an initial CSV file that you can
then edit further.
out.csv <signatures>` or `sourmash sig manifest -o out.csv
<filename_or_db>` to construct an initial CSV file that you can then
edit further; after editing, these can be passed in via the picklist
argument `--picklist out.csv::manifest`.

The picklist functionality also supports excluding (rather than
including) signatures matching the picklist arguments. To specify a
Expand Down
16 changes: 9 additions & 7 deletions src/sourmash/sig/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,14 +206,16 @@ def describe(args):

# write CSV?
w = None
csv_fp = None
csv_obj = None
if args.csv:
csv_obj = sourmash_args.FileOutputCSV(args.csv)
csv_fp = csv_obj.open()

# CTB: might want to switch to sourmash_args.FileOutputCSV here?
csv_fp = open(args.csv, 'w', newline='')
w = csv.DictWriter(csv_fp,
['signature_file', 'md5', 'ksize', 'moltype', 'num',
'scaled', 'n_hashes', 'seed', 'with_abundance',
'name', 'filename', 'license'],
['signature_file', 'md5', 'ksize', 'moltype',
'num', 'scaled', 'n_hashes', 'seed',
'with_abundance', 'name', 'filename', 'license'],
extrasaction='ignore')
w.writeheader()

Expand Down Expand Up @@ -260,8 +262,8 @@ def describe(args):
signature license: {license}
''', **locals())

if csv_fp:
csv_fp.close()
if csv_obj:
csv_obj.close()

if picklist:
sourmash_args.report_picklist(args, picklist)
Expand Down
36 changes: 33 additions & 3 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -2928,9 +2928,10 @@ def test_sig_describe_empty(c):
assert 'source file: ** no name **' in c.last_result.out


@utils.in_tempdir
def test_sig_describe_2(c):
# get info in CSV spreadsheet
def test_sig_describe_2_csv(runtmp):
# output info in CSV spreadsheet
c = runtmp

sig47 = utils.get_test_data('47.fa.sig')
sig63 = utils.get_test_data('63.fa.sig')
c.run_sourmash('sig', 'describe', sig47, sig63, '--csv', 'out.csv')
Expand All @@ -2950,6 +2951,35 @@ def test_sig_describe_2(c):
assert n == 2


def test_sig_describe_2_csv_as_picklist(runtmp):
# generate an output CSV from describe and then use it as a manifest
# pickfile
c = runtmp

sig47 = utils.get_test_data('47.fa.sig')
outcsv = runtmp.output('out.csv')

c.run_sourmash('sig', 'describe', sig47,
'--csv', outcsv)

c.run_sourmash('sig', 'describe', sig47,
'--picklist', f'{outcsv}::manifest')

out = c.last_result.out
print(c.last_result)

expected_output = """\
signature: NC_009665.1 Shewanella baltica OS185, complete genome
source file: 47.fa
md5: 09a08691ce52952152f0e866a59f6261
k=31 molecule=DNA num=0 scaled=1000 seed=42 track_abundance=0
size: 5177
signature license: CC0
""".splitlines()
for line in expected_output:
assert line.strip() in out


@utils.in_tempdir
def test_sig_overlap(c):
# get overlap details
Expand Down

0 comments on commit efebe8a

Please sign in to comment.