[MRG] update output from gather w/o abundances, so that abund output …

…is empty (#1328) * update gather with no abundances so that abund output is empty * add tests for new CSV abund output behavior
sourmash-bio · Feb 15, 2021 · bf5eeba · bf5eeba
1 parent 128e50c
commit bf5eeba
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 6 deletions.
diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py
@@ -624,13 +624,13 @@ def gather(args):
                 print_results("---------   ------- -------")
 
 
-        # print interim result & save in a list for later use
+        # print interim result & save in `found` list for later use
         pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
         pct_genome = '{:.1f}%'.format(result.f_match*100)
-        average_abund ='{:.1f}'.format(result.average_abund)
         name = result.match._display_name(40)
 
         if query.minhash.track_abundance and not args.ignore_abundance:
+            average_abund ='{:.1f}'.format(result.average_abund)
             print_results('{:9}   {:>7} {:>7} {:>9}    {}',
                       format_bp(result.intersect_bp), pct_query, pct_genome,
                       average_abund, name)
@@ -758,10 +758,10 @@ def multigather(args):
                 # print interim result & save in a list for later use
                 pct_query = '{:.1f}%'.format(result.f_unique_weighted*100)
                 pct_genome = '{:.1f}%'.format(result.f_match*100)
-                average_abund ='{:.1f}'.format(result.average_abund)
                 name = result.match._display_name(40)
 
                 if query.minhash.track_abundance and not args.ignore_abundance:
+                    average_abund ='{:.1f}'.format(result.average_abund)
                     print_results('{:9}   {:>7} {:>7} {:>9}    {}',
                               format_bp(result.intersect_bp), pct_query, pct_genome,
                               average_abund, name)

diff --git a/src/sourmash/search.py b/src/sourmash/search.py
@@ -186,7 +186,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance):
         f_unique_weighted /= sum_abunds
 
         # calculate stats on abundances, if desired.
-        average_abund, median_abund, std_abund = 0, 0, 0
+        average_abund, median_abund, std_abund = None, None, None
         if track_abundance:
             intersect_abunds = (orig_query_abunds[k] for k in intersect_mins)
             intersect_abunds = list(intersect_abunds)

diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py
@@ -3669,7 +3669,7 @@ def test_gather_abund_10_1(c):
     assert total_bp_analyzed == total_query_bp
 
 
-@utils.in_thisdir
+@utils.in_tempdir
 def test_gather_abund_10_1_ignore_abundance(c):
     # see comments in test_gather_abund_1_1, above.
     # nullgraph/make-reads.py -S 1 -r 200 -C 2 tests/test-data/genome-s10.fa.gz > r1.fa
@@ -3686,7 +3686,9 @@ def test_gather_abund_10_1_ignore_abundance(c):
 
     status, out, err = c.run_sourmash('gather', query,
                                       '--ignore-abundance',
-                                      *against_list)
+                                      *against_list,
+                                      '-o', c.output('results.csv'))
+
 
     print(out)
     print(err)
@@ -3702,6 +3704,18 @@ def test_gather_abund_10_1_ignore_abundance(c):
     assert all(('42.8%   80.0%', 'tests/test-data/genome-s11.fa.gz' in out))
     assert 'genome-s12.fa.gz' not in out
 
+    with open(c.output('results.csv'), 'rt') as fp:
+        r = csv.DictReader(fp)
+        some_results = False
+        for row in r:
+            some_results = True
+            assert row['average_abund'] is ''
+            assert row['median_abund'] is ''
+            assert row['std_abund'] is ''
+
+        assert some_results
+
+
 
 @utils.in_tempdir
 def test_gather_output_unassigned_with_abundance(c):