From ded4bd66aa63d926ef2db64f52c3965d7650274c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 15 Feb 2021 08:50:45 -0800 Subject: [PATCH 1/2] update gather with no abundances so that abund output is empty --- src/sourmash/commands.py | 6 +++--- src/sourmash/search.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py index 678bd6ef6e..a18b0045ca 100644 --- a/src/sourmash/commands.py +++ b/src/sourmash/commands.py @@ -624,13 +624,13 @@ def gather(args): print_results("--------- ------- -------") - # print interim result & save in a list for later use + # print interim result & save in `found` list for later use pct_query = '{:.1f}%'.format(result.f_unique_weighted*100) pct_genome = '{:.1f}%'.format(result.f_match*100) - average_abund ='{:.1f}'.format(result.average_abund) name = result.match._display_name(40) if query.minhash.track_abundance and not args.ignore_abundance: + average_abund ='{:.1f}'.format(result.average_abund) print_results('{:9} {:>7} {:>7} {:>9} {}', format_bp(result.intersect_bp), pct_query, pct_genome, average_abund, name) @@ -758,10 +758,10 @@ def multigather(args): # print interim result & save in a list for later use pct_query = '{:.1f}%'.format(result.f_unique_weighted*100) pct_genome = '{:.1f}%'.format(result.f_match*100) - average_abund ='{:.1f}'.format(result.average_abund) name = result.match._display_name(40) if query.minhash.track_abundance and not args.ignore_abundance: + average_abund ='{:.1f}'.format(result.average_abund) print_results('{:9} {:>7} {:>7} {:>9} {}', format_bp(result.intersect_bp), pct_query, pct_genome, average_abund, name) diff --git a/src/sourmash/search.py b/src/sourmash/search.py index e69b24ab5d..1a4ee61e36 100644 --- a/src/sourmash/search.py +++ b/src/sourmash/search.py @@ -186,7 +186,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance): f_unique_weighted /= sum_abunds # calculate stats on abundances, if desired. - average_abund, median_abund, std_abund = 0, 0, 0 + average_abund, median_abund, std_abund = None, None, None if track_abundance: intersect_abunds = (orig_query_abunds[k] for k in intersect_mins) intersect_abunds = list(intersect_abunds) From 05d902dd722f0898fda058fad152c65f59a9976b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 15 Feb 2021 08:57:45 -0800 Subject: [PATCH 2/2] add tests for new CSV abund output behavior --- tests/test_sourmash.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 7fc2df5915..2f144e32ed 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -3669,7 +3669,7 @@ def test_gather_abund_10_1(c): assert total_bp_analyzed == total_query_bp -@utils.in_thisdir +@utils.in_tempdir def test_gather_abund_10_1_ignore_abundance(c): # see comments in test_gather_abund_1_1, above. # nullgraph/make-reads.py -S 1 -r 200 -C 2 tests/test-data/genome-s10.fa.gz > r1.fa @@ -3686,7 +3686,9 @@ def test_gather_abund_10_1_ignore_abundance(c): status, out, err = c.run_sourmash('gather', query, '--ignore-abundance', - *against_list) + *against_list, + '-o', c.output('results.csv')) + print(out) print(err) @@ -3702,6 +3704,18 @@ def test_gather_abund_10_1_ignore_abundance(c): assert all(('42.8% 80.0%', 'tests/test-data/genome-s11.fa.gz' in out)) assert 'genome-s12.fa.gz' not in out + with open(c.output('results.csv'), 'rt') as fp: + r = csv.DictReader(fp) + some_results = False + for row in r: + some_results = True + assert row['average_abund'] is '' + assert row['median_abund'] is '' + assert row['std_abund'] is '' + + assert some_results + + @utils.in_tempdir def test_gather_output_unassigned_with_abundance(c):