Skip to content

Commit

Permalink
[MRG] update output from gather w/o abundances, so that abund output …
Browse files Browse the repository at this point in the history
…is empty (#1328)

* update gather with no abundances so that abund output is empty

* add tests for new CSV abund output behavior
  • Loading branch information
ctb committed Feb 15, 2021
1 parent 128e50c commit bf5eeba
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
6 changes: 3 additions & 3 deletions src/sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,13 +624,13 @@ def gather(args):
print_results("--------- ------- -------")


# print interim result & save in a list for later use
# print interim result & save in `found` list for later use
pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
pct_genome = '{:.1f}%'.format(result.f_match*100)
average_abund ='{:.1f}'.format(result.average_abund)
name = result.match._display_name(40)

if query.minhash.track_abundance and not args.ignore_abundance:
average_abund ='{:.1f}'.format(result.average_abund)
print_results('{:9} {:>7} {:>7} {:>9} {}',
format_bp(result.intersect_bp), pct_query, pct_genome,
average_abund, name)
Expand Down Expand Up @@ -758,10 +758,10 @@ def multigather(args):
# print interim result & save in a list for later use
pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
pct_genome = '{:.1f}%'.format(result.f_match*100)
average_abund ='{:.1f}'.format(result.average_abund)
name = result.match._display_name(40)

if query.minhash.track_abundance and not args.ignore_abundance:
average_abund ='{:.1f}'.format(result.average_abund)
print_results('{:9} {:>7} {:>7} {:>9} {}',
format_bp(result.intersect_bp), pct_query, pct_genome,
average_abund, name)
Expand Down
2 changes: 1 addition & 1 deletion src/sourmash/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance):
f_unique_weighted /= sum_abunds

# calculate stats on abundances, if desired.
average_abund, median_abund, std_abund = 0, 0, 0
average_abund, median_abund, std_abund = None, None, None
if track_abundance:
intersect_abunds = (orig_query_abunds[k] for k in intersect_mins)
intersect_abunds = list(intersect_abunds)
Expand Down
18 changes: 16 additions & 2 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -3669,7 +3669,7 @@ def test_gather_abund_10_1(c):
assert total_bp_analyzed == total_query_bp


@utils.in_thisdir
@utils.in_tempdir
def test_gather_abund_10_1_ignore_abundance(c):
# see comments in test_gather_abund_1_1, above.
# nullgraph/make-reads.py -S 1 -r 200 -C 2 tests/test-data/genome-s10.fa.gz > r1.fa
Expand All @@ -3686,7 +3686,9 @@ def test_gather_abund_10_1_ignore_abundance(c):

status, out, err = c.run_sourmash('gather', query,
'--ignore-abundance',
*against_list)
*against_list,
'-o', c.output('results.csv'))


print(out)
print(err)
Expand All @@ -3702,6 +3704,18 @@ def test_gather_abund_10_1_ignore_abundance(c):
assert all(('42.8% 80.0%', 'tests/test-data/genome-s11.fa.gz' in out))
assert 'genome-s12.fa.gz' not in out

with open(c.output('results.csv'), 'rt') as fp:
r = csv.DictReader(fp)
some_results = False
for row in r:
some_results = True
assert row['average_abund'] is ''
assert row['median_abund'] is ''
assert row['std_abund'] is ''

assert some_results



@utils.in_tempdir
def test_gather_output_unassigned_with_abundance(c):
Expand Down

0 comments on commit bf5eeba

Please sign in to comment.