Skip to content

Commit d435781

Browse files
committed
Merge pull request #1214 from wasade/demux_fetch_bug
Demux fetch bug
2 parents 13e54c1 + 454446b commit d435781

File tree

2 files changed

+37
-4
lines changed

2 files changed

+37
-4
lines changed

qiita_ware/demux.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def to_ascii(demux, samples=None):
438438
for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in fetch(demux, samples):
439439
seq_id = id_fmt % {'sample': samp, 'idx': idx, 'bc_ori': bc_ori,
440440
'bc_cor': bc_cor, 'bc_diff': bc_err}
441-
if qual is not None:
441+
if qual != []:
442442
qual = qual.astype(np.uint8)
443443

444444
yield formatter(seq_id, seq, qual)
@@ -516,7 +516,7 @@ def fetch(demux, samples=None, k=None):
516516
seqs = demux[pjoin(dset_paths['sequence'])][indices]
517517

518518
# only yield qual if we have it
519-
quals = repeat(None)
519+
quals = repeat([])
520520
if demux.attrs['has-qual']:
521521
if len(indices) == 1:
522522
if indices[0]:
@@ -531,8 +531,8 @@ def fetch(demux, samples=None, k=None):
531531
iter_ = zip(repeat(sample), np.arange(indices.size)[indices], seqs,
532532
quals, bc_original, bc_corrected, bc_error)
533533

534-
for item in iter_:
535-
yield item
534+
for samp, idx, seq, qual, bc_ori, bc_cor, bc_err in iter_:
535+
yield (samp, idx, seq, qual[:len(seq)], bc_ori, bc_cor, bc_err)
536536

537537

538538
def stats(demux):

qiita_ware/test/test_demux.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,25 @@ def test_fetch(self):
331331
# implicitly tested with test_to_ascii
332332
pass
333333

334+
def test_fetch_qual_length_bug(self):
335+
# fetch was not trimming qual to the length of the sequence resulting
336+
# in qual scores for positions beyond the length of the sequence.
337+
with tempfile.NamedTemporaryFile('r+', suffix='.fq',
338+
delete=False) as f:
339+
f.write(fqdata_variable_length)
340+
341+
self.to_remove.append(f.name)
342+
to_hdf5(f.name, self.hdf5_file)
343+
344+
exp = [('a', [(b"@a_0 orig_bc=abc new_bc=abc bc_diffs=0\nxyz\n+\n"
345+
"ABC\n")]),
346+
('b', [(b"@b_0 orig_bc=abw new_bc=wbc bc_diffs=4\nqwe\n+\n"
347+
"DFG\n"),
348+
(b"@b_1 orig_bc=abw new_bc=wbc bc_diffs=4\nqwexx\n+\n"
349+
"DEF#G\n")])]
350+
351+
obs = [(s[0], list(s[1])) for s in to_per_sample_ascii(self.hdf5_file)]
352+
self.assertEqual(obs, exp)
334353

335354
seqdata = """>a_1 orig_bc=abc new_bc=abc bc_diffs=0
336355
x
@@ -370,5 +389,19 @@ def test_fetch(self):
370389
DEF
371390
"""
372391

392+
fqdata_variable_length = """@a_1 orig_bc=abc new_bc=abc bc_diffs=0
393+
xyz
394+
+
395+
ABC
396+
@b_1 orig_bc=abw new_bc=wbc bc_diffs=4
397+
qwe
398+
+
399+
DFG
400+
@b_2 orig_bc=abw new_bc=wbc bc_diffs=4
401+
qwexx
402+
+
403+
DEF#G
404+
"""
405+
373406
if __name__ == '__main__':
374407
main()

0 commit comments

Comments
 (0)