diff --git a/qiita_ware/demux.py b/qiita_ware/demux.py index 0938ba696..f26c05f11 100644 --- a/qiita_ware/demux.py +++ b/qiita_ware/demux.py @@ -438,7 +438,7 @@ def to_ascii(demux, samples=None): for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in fetch(demux, samples): seq_id = id_fmt % {'sample': samp, 'idx': idx, 'bc_ori': bc_ori, 'bc_cor': bc_cor, 'bc_diff': bc_err} - if qual is not None: + if qual != []: qual = qual.astype(np.uint8) yield formatter(seq_id, seq, qual) @@ -516,7 +516,7 @@ def fetch(demux, samples=None, k=None): seqs = demux[pjoin(dset_paths['sequence'])][indices] # only yield qual if we have it - quals = repeat(None) + quals = repeat([]) if demux.attrs['has-qual']: if len(indices) == 1: if indices[0]: @@ -531,8 +531,8 @@ def fetch(demux, samples=None, k=None): iter_ = zip(repeat(sample), np.arange(indices.size)[indices], seqs, quals, bc_original, bc_corrected, bc_error) - for item in iter_: - yield item + for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in iter_: + yield (samp, idx, seq, qual[:len(seq)], bc_ori, bc_cor, bc_err) def stats(demux): diff --git a/qiita_ware/test/test_demux.py b/qiita_ware/test/test_demux.py index 2ded52874..a1d8e1d35 100644 --- a/qiita_ware/test/test_demux.py +++ b/qiita_ware/test/test_demux.py @@ -331,6 +331,25 @@ def test_fetch(self): # implicitly tested with test_to_ascii pass + def test_fetch_qual_length_bug(self): + # fetch was not trimming qual to the length of the sequence resulting + # in qual scores for positions beyond the length of the sequence. + with tempfile.NamedTemporaryFile('r+', suffix='.fq', + delete=False) as f: + f.write(fqdata_variable_length) + + self.to_remove.append(f.name) + to_hdf5(f.name, self.hdf5_file) + + exp = [('a', [(b"@a_0 orig_bc=abc new_bc=abc bc_diffs=0\nxyz\n+\n" + "ABC\n")]), + ('b', [(b"@b_0 orig_bc=abw new_bc=wbc bc_diffs=4\nqwe\n+\n" + "DFG\n"), + (b"@b_1 orig_bc=abw new_bc=wbc bc_diffs=4\nqwexx\n+\n" + "DEF#G\n")])] + + obs = [(s[0], list(s[1])) for s in to_per_sample_ascii(self.hdf5_file)] + self.assertEqual(obs, exp) seqdata = """>a_1 orig_bc=abc new_bc=abc bc_diffs=0 x @@ -370,5 +389,19 @@ def test_fetch(self): DEF """ +fqdata_variable_length = """@a_1 orig_bc=abc new_bc=abc bc_diffs=0 +xyz ++ +ABC +@b_1 orig_bc=abw new_bc=wbc bc_diffs=4 +qwe ++ +DFG +@b_2 orig_bc=abw new_bc=wbc bc_diffs=4 +qwexx ++ +DEF#G +""" + if __name__ == '__main__': main()