Skip to content

Commit

Permalink
Merge pull request #617 from pysam-developers/AH-DoubleFetchIssue
Browse files Browse the repository at this point in the history
{AH} each iterator now loads own copy of index, fixes #611 and fixes …
  • Loading branch information
AndreasHeger committed Feb 7, 2018
2 parents 7ebad1f + e5892c3 commit 4849c99
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 17 deletions.
2 changes: 2 additions & 0 deletions doc/release.rst
Expand Up @@ -12,6 +12,8 @@ Release 0.14.0
VariantFile. The end designations have been kept for backwards
compatibility.

* [#611] and [#293] CRAM repeated fetch now works, each iterator
reloads index if multiple_iterators=True
* [#608] pysam now wraps htslib 1.7 and samtools 1.7.
* [#580] reference_name and next_reference_name can now be set to "*"
(will be converted to None to indicate an unmapped location)
Expand Down
1 change: 1 addition & 0 deletions pysam/libcalignmentfile.pxd
Expand Up @@ -81,6 +81,7 @@ cdef class IteratorRow:
cdef bam1_t * b
cdef AlignmentFile samfile
cdef htsFile * htsfile
cdef hts_idx_t * index
cdef AlignmentHeader header
cdef int owns_samfile

Expand Down
32 changes: 20 additions & 12 deletions pysam/libcalignmentfile.pyx
Expand Up @@ -942,12 +942,9 @@ cdef class AlignmentFile(HTSFile):
"check_sq=False") % mode)

if self.is_bam or self.is_cram:
# open index for remote files
# returns NULL if there is no index or index could
# not be opened
index_filename = index_filename or filepath_index
if index_filename:
cindexname = bindex_filename = encode_filename(index_filename)
self.index_filename = index_filename or filepath_index
if self.index_filename:
cindexname = bfile_name = encode_filename(self.index_filename)

if cfilename or cindexname:
with nogil:
Expand All @@ -957,7 +954,7 @@ cdef class AlignmentFile(HTSFile):
if errno:
raise IOError(errno, force_str(strerror(errno)))
else:
raise IOError('unable to open index file `%s`' % index_filename)
raise IOError('unable to open index file `%s`' % self.index_filename)

elif require_index:
raise IOError('unable to open index file')
Expand Down Expand Up @@ -1886,7 +1883,8 @@ cdef class IteratorRow:
def __init__(self, AlignmentFile samfile, int multiple_iterators=False):
cdef char *cfilename
cdef char *creference_filename

cdef char *cindexname = NULL

if not samfile.is_open:
raise ValueError("I/O operation on closed file")

Expand All @@ -1903,6 +1901,14 @@ cdef class IteratorRow:
self.htsfile = hts_open(cfilename, 'r')
assert self.htsfile != NULL

if samfile.has_index():
if samfile.index_filename:
cindexname = samfile.index_filename
with nogil:
self.index = sam_index_load2(self.htsfile, cfilename, cindexname)
else:
self.index = NULL

# need to advance in newly opened file to position after header
# better: use seek/tell?
with nogil:
Expand All @@ -1922,6 +1928,7 @@ cdef class IteratorRow:

else:
self.htsfile = samfile.htsfile
self.index = samfile.index
self.owns_samfile = False
self.header = samfile.header

Expand All @@ -1933,6 +1940,7 @@ cdef class IteratorRow:
bam_destroy1(self.b)
if self.owns_samfile:
hts_close(self.htsfile)
hts_idx_destroy(self.index)


cdef class IteratorRowRegion(IteratorRow):
Expand All @@ -1953,15 +1961,15 @@ cdef class IteratorRowRegion(IteratorRow):
int tid, int beg, int stop,
int multiple_iterators=False):

IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)

if not samfile.has_index():
raise ValueError("no index available for iteration")

IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)

with nogil:
self.iter = sam_itr_queryi(
self.samfile.index,
self.index,
tid,
beg,
stop)
Expand Down
31 changes: 26 additions & 5 deletions tests/AlignmentFile_test.py
Expand Up @@ -1500,22 +1500,43 @@ def testDoubleFetch(self):
samfile1.fetch(multiple_iterators=True)):
self.assertEqual(a.compare(b), 0)

def testDoubleFetchWithRegion(self):
def testDoubleFetchWithRegionTrueTrue(self):

with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
contig, start, stop = 'chr1', 200, 3000000
contig, start, stop = 'chr2', 200, 3000000
# just making sure the test has something to catch
self.assertTrue(len(list(samfile1.fetch(contig, start, stop))) > 0)

# see Issue #293
# The following fails for CRAM files, but works for BAM
# files when the first is multiple_iterators=False:
for a, b in zip(samfile1.fetch(contig, start, stop,
multiple_iterators=True),
samfile1.fetch(contig, start, stop,
multiple_iterators=True)):
self.assertEqual(a.compare(b), 0)

def testDoubleFetchWithRegionFalseTrue(self):
with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
contig, start, stop = 'chr2', 200, 3000000
# just making sure the test has something to catch
self.assertTrue(len(list(samfile1.fetch(contig, start, stop))) > 0)

for a, b in zip(samfile1.fetch(contig, start, stop,
multiple_iterators=False),
samfile1.fetch(contig, start, stop,
multiple_iterators=True)):
self.assertEqual(a.compare(b), 0)

def testDoubleFetchWithRegionTrueFalse(self):
with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
contig, start, stop = 'chr2', 200, 3000000
# just making sure the test has something to catch
self.assertTrue(len(list(samfile1.fetch(contig, start, stop))) > 0)

for a, b in zip(samfile1.fetch(contig, start, stop,
multiple_iterators=True),
samfile1.fetch(contig, start, stop,
multiple_iterators=False)):
self.assertEqual(a.compare(b), 0)

def testDoubleFetchUntilEOF(self):

with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
Expand Down

0 comments on commit 4849c99

Please sign in to comment.