Skip to content

Commit

Permalink
Accept file list in sourmash sig cat (#1236)
Browse files Browse the repository at this point in the history
* Allow loading DBs from a file list
* add test and impl for passing DBs in the file list
  • Loading branch information
luizirber committed Nov 5, 2020
1 parent a6c800e commit 95bd546
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 0 deletions.
15 changes: 15 additions & 0 deletions sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,21 @@ def _load_database(filename, traverse_yield_all, *, cache_size=None):
except Exception as exc:
pass

if not loaded: # try load signatures from single file (list of signature paths)
try:
db = []
with open(filename, 'rt') as fp:
for line in fp:
line = line.strip()
if line:
sigs = load_file_as_signatures(line)
db += list(sigs)

loaded = True
dbtype = DatabaseType.SIGLIST
except Exception as exc:
pass

if not loaded: # try load as SBT
try:
db = load_sbt_index(filename, cache_size=cache_size)
Expand Down
46 changes: 46 additions & 0 deletions tests/test_cmd_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,52 @@ def test_sig_cat_2_out_inplace(c):
assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""


@utils.in_tempdir
def test_sig_cat_filelist(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
multisig = utils.get_test_data('47+63-multisig.sig')

filelist = c.output("filelist")
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, multisig)))

c.run_sourmash('sig', 'cat', filelist,
'-o', 'out.sig')

# stdout should be same signatures
out = c.output('out.sig')

siglist = list(sourmash.load_signatures(out))
print(len(siglist))

assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 57e2b22f), SourmashSignature('NC_009661.1 Shewanella baltica OS185 plasmid pS18501, complete sequence', bde81a41), SourmashSignature('NC_011663.1 Shewanella baltica OS223, complete genome', f033bbd8), SourmashSignature('NC_011664.1 Shewanella baltica OS223 plasmid pS22301, complete sequence', 87a9aec4), SourmashSignature('NC_011668.1 Shewanella baltica OS223 plasmid pS22302, complete sequence', 837bf2a7), SourmashSignature('NC_011665.1 Shewanella baltica OS223 plasmid pS22303, complete sequence', 485c3377)]"""


@utils.in_tempdir
def test_sig_cat_filelist_with_dbs(c):
# cat using a file list as input
sig47 = utils.get_test_data('47.fa.sig')
sig47abund = utils.get_test_data('track_abund/47.fa.sig')
sbt = utils.get_test_data('v6.sbt.zip')

filelist = c.output("filelist")
with open(filelist, 'w') as f:
f.write("\n".join((sig47, sig47abund, sbt)))

c.run_sourmash('sig', 'cat', filelist,
'-o', 'out.sig')

# stdout should be same signatures
out = c.output('out.sig')

siglist = list(sourmash.load_signatures(out))
print(len(siglist))

assert repr(siglist) == """[SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('NC_009665.1 Shewanella baltica OS185, complete genome', 09a08691), SourmashSignature('', 6d6e87e1), SourmashSignature('', 60f7e23c), SourmashSignature('', 0107d767), SourmashSignature('', f71e7817), SourmashSignature('', f0c834bc), SourmashSignature('', 4e94e602), SourmashSignature('', b59473c9)]"""


@utils.in_tempdir
def test_sig_split_1(c):
# split 47 into 1 sig :)
Expand Down

0 comments on commit 95bd546

Please sign in to comment.