Skip to content

Commit

Permalink
queryEnd now computes length from cigar string if no sequence present,
Browse files Browse the repository at this point in the history
…closes #176
  • Loading branch information
AndreasHeger committed Oct 30, 2015
1 parent 5f99ede commit 6c3d854
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 20 deletions.
53 changes: 33 additions & 20 deletions pysam/calignedsegment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,32 @@ cdef inline packTags(tags):
return "".join(fmts), args


cdef inline int32_t calculateQueryLength(bam1_t * src):
"""return query length computed from CIGAR alignment.
Return 0 if there is no CIGAR alignment.
"""

cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)

if cigar_p == NULL:
return 0

cdef uint32_t k, qpos
cdef int op
qpos = 0

for k from 0 <= k < pysam_get_n_cigar(src):
op = cigar_p[k] & BAM_CIGAR_MASK

if op == BAM_CMATCH or op == BAM_CINS or \
op == BAM_CSOFT_CLIP or \
op == BAM_CEQUAL or op == BAM_CDIFF:
qpos += cigar_p[k] >> BAM_CIGAR_SHIFT

return qpos


cdef inline int32_t getQueryStart(bam1_t *src) except -1:
cdef uint32_t * cigar_p
cdef uint32_t k, op
Expand All @@ -393,6 +419,11 @@ cdef inline int32_t getQueryEnd(bam1_t *src) except -1:
cdef uint32_t k, op
cdef uint32_t end_offset = src.core.l_qseq

# if there is no sequence, compute length from cigar string
if end_offset == 0:
end_offset = calculateQueryLength(src)

# walk backwards in cigar string
if pysam_get_n_cigar(src) > 1:
cigar_p = pysam_bam_get_cigar(src);
for k from pysam_get_n_cigar(src) > k >= 1:
Expand All @@ -407,9 +438,6 @@ cdef inline int32_t getQueryEnd(bam1_t *src) except -1:
else:
break

if end_offset == 0:
end_offset = src.core.l_qseq

return end_offset


Expand Down Expand Up @@ -1274,8 +1302,7 @@ cdef class AlignedSegment:
Returns None if CIGAR string is not present.
"""
cdef uint32_t k, qpos
cdef int op

cdef uint32_t * cigar_p
cdef bam1_t * src

Expand All @@ -1284,21 +1311,7 @@ cdef class AlignedSegment:
if not always and src.core.l_qseq:
return src.core.l_qseq

if pysam_get_n_cigar(src) == 0:
return None

qpos = 0
cigar_p = pysam_bam_get_cigar(src)

for k from 0 <= k < pysam_get_n_cigar(src):
op = cigar_p[k] & BAM_CIGAR_MASK

if op == BAM_CMATCH or op == BAM_CINS or \
op == BAM_CSOFT_CLIP or \
op == BAM_CEQUAL or op == BAM_CDIFF:
qpos += cigar_p[k] >> BAM_CIGAR_SHIFT

return qpos
return calculateQueryLength(src)

def get_aligned_pairs(self, matches_only = False):
"""a list of aligned read (query) and reference positions.
Expand Down
15 changes: 15 additions & 0 deletions tests/AlignedSegment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,21 @@ def inner():
# padding is not bein handled right now
self.assertRaises(NotImplementedError, inner)

def testNoSequence(self):
'''issue 176: retrieving length without query sequence
with soft-clipping.
'''
a = self.buildRead()
a.query_sequence = None
a.cigarstring = "20M"
self.assertEqual(a.query_alignment_length, 20)
a.cigarstring = "20M1S"
self.assertEqual(a.query_alignment_length, 20)
a.cigarstring = "1S20M"
self.assertEqual(a.query_alignment_length, 20)
a.cigarstring = "1S20M1S"
self.assertEqual(a.query_alignment_length, 20)


class TestTags(ReadTest):

Expand Down

0 comments on commit 6c3d854

Please sign in to comment.