Make BAMFileReader and some related classes public #786
| @@ -40,7 +40,7 @@ | ||
| /** | ||
| * Class for reading and querying BAM files. | ||
| */ | ||
| -class BAMFileReader extends SamReader.ReaderImplementation { | ||
| +public class BAMFileReader extends SamReader.ReaderImplementation { | ||
| // True if reading from a File rather than an InputStream | ||
| private boolean mIsSeekable = false; | ||
| @@ -869,25 +869,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) { | ||
| } | ||
| } | ||
| - private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals, | ||
| - final boolean contained) { | ||
| - | ||
| - assertIntervalsOptimized(intervals); | ||
| - | ||
| - // Hit the index to determine the chunk boundaries for the required data. | ||
| + /** | ||
| + * Use the index to determine the chunk boundaries for the required intervals. | ||
| + * @param intervals the intervals to restrict reads to | ||
| + * @param fileIndex the BAM index to use | ||
| + * @return file pointer pairs corresponding to chunk boundaries | ||
| + */ | ||
| + public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { | ||
| final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; | ||
| - final BAMIndex fileIndex = getIndex(); | ||
| for (int i = 0; i < intervals.length; ++i) { | ||
| final QueryInterval interval = intervals[i]; | ||
| final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); | ||
| inputSpans[i] = span; | ||
| } | ||
| - final long[] filePointers; | ||
| + final BAMFileSpan span; | ||
| if (inputSpans.length > 0) { | ||
| - filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray(); | ||
| + span = BAMFileSpan.merge(inputSpans); | ||
| } else { | ||
| - filePointers = null; | ||
| + span = null; | ||
| } | ||
| + return span; | ||
| + } | ||
| + | ||
| + private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals, | ||
| + final boolean contained) { | ||
| + | ||
| + assertIntervalsOptimized(intervals); | ||
| + | ||
| + BAMFileSpan span = getFileSpan(intervals, getIndex()); | ||
| + | ||
| + // Create an iterator over the above chunk boundaries. | ||
| + final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray()); | ||
| + | ||
| + // Add some preprocessing filters for edge-case reads that don't fit into this | ||
| + // query type. | ||
| + return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); | ||
| + } | ||
| + | ||
| + /** | ||
| + * Prepare to iterate through SAMRecords that match the given intervals. | ||
| + * @param intervals the intervals to restrict reads to | ||
| + * @param contained if <code>true</code>, return records that are strictly | ||
| + * contained in the intervals, otherwise return records that overlap | ||
| + * @param filePointers file pointer pairs corresponding to chunk boundaries for the | ||
| + * intervals | ||
| + */ | ||
| + public CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals, | ||
| + final boolean contained, | ||
| + final long[] filePointers) { | ||
| + | ||
| + assertIntervalsOptimized(intervals); | ||
droazen
Contributor
|
||
| // Create an iterator over the above chunk boundaries. | ||
| final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers); | ||
| @@ -115,15 +115,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { | ||
| validateSorted(); | ||
| final BAMFileSpan trimmedChunkList = new BAMFileSpan(); | ||
| + final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart(); | ||
| for(final Chunk chunkToTrim: chunks) { | ||
| - if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) { | ||
| - if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) { | ||
| + if(chunkToTrim.getChunkEnd() > chunkStart) { | ||
droazen
Contributor
|
||
| + if(chunkToTrim.getChunkStart() >= chunkStart) { | ||
| // This chunk from the list is completely beyond the start of the filtering chunk. | ||
| trimmedChunkList.add(chunkToTrim.clone()); | ||
| } | ||
| else { | ||
| // This chunk from the list partially overlaps the filtering chunk and must be trimmed. | ||
| - trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd())); | ||
| + trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd())); | ||
| + } | ||
| + } | ||
| + } | ||
| + return trimmedChunkList; | ||
| + } | ||
| + | ||
| + /** | ||
| + * Creates a new file span by removing all chunks after the given file span ends. | ||
| + * If a chunk in the chunk list starts before and ends after the given | ||
| + * chunk, the second portion of the chunk will be deleted. | ||
| + * @param fileSpan The filespan after which to eliminate. | ||
| + * @return A new BAMFileSpan which contains the portion of the chunk list before the | ||
| + * given chunk. | ||
| + */ | ||
| + public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) { | ||
| + if(fileSpan == null) | ||
| + return clone(); | ||
| + | ||
| + if(!(fileSpan instanceof BAMFileSpan)) | ||
| + throw new SAMException("Unable to compare "); | ||
| + | ||
| + final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan; | ||
| + | ||
| + if(bamFileSpan.isEmpty()) | ||
| + return clone(); | ||
| + | ||
| + validateSorted(); | ||
| + | ||
| + final BAMFileSpan trimmedChunkList = new BAMFileSpan(); | ||
| + final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); | ||
| + for(final Chunk chunkToTrim: chunks) { | ||
| + if(chunkToTrim.getChunkStart() < chunkEnd) { | ||
| + if(chunkToTrim.getChunkEnd() <= chunkEnd) { | ||
| + // This chunk from the list is completely before the end of the filtering chunk. | ||
| + trimmedChunkList.add(chunkToTrim.clone()); | ||
| + } | ||
| + else { | ||
| + // This chunk from the list partially overlaps the filtering chunk and must be trimmed. | ||
| + trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd)); | ||
droazen
Contributor
|
||
| } | ||
| } | ||
| } | ||
| @@ -0,0 +1,70 @@ | ||
| +package htsjdk.samtools; | ||
| + | ||
| +import java.util.Arrays; | ||
| +import org.testng.Assert; | ||
| +import org.testng.annotations.DataProvider; | ||
| +import org.testng.annotations.Test; | ||
| + | ||
| +public class BAMFileSpanTest { | ||
| + @Test(dataProvider = "testRemoveContentsBeforeProvider") | ||
| + public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff, | ||
| + BAMFileSpan expectedSpan) { | ||
| + // only start value in cutoff is used | ||
| + Assert.assertEquals( | ||
| + ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(), | ||
| + expectedSpan.getChunks()); | ||
| + } | ||
| + | ||
| + @DataProvider(name = "testRemoveContentsBeforeProvider") | ||
| + private Object[][] testRemoveContentsBeforeProvider() { | ||
| + return new Object[][] { | ||
| + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() }, | ||
| + }; | ||
| + } | ||
| + | ||
| + @Test(dataProvider = "testRemoveContentsAfterProvider") | ||
| + public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff, | ||
| + BAMFileSpan expectedSpan) { | ||
| + // only end value in cutoff is used | ||
| + Assert.assertEquals( | ||
| + ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(), | ||
| + expectedSpan.getChunks()); | ||
| + } | ||
| + | ||
| + @DataProvider(name = "testRemoveContentsAfterProvider") | ||
| + private Object[][] testRemoveContentsAfterProvider() { | ||
| + return new Object[][] { | ||
| + { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) }, | ||
droazen
Contributor
|
||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) }, | ||
| + { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) }, | ||
| + }; | ||
| + } | ||
| + | ||
| + private BAMFileSpan span(Chunk... chunks) { | ||
| + return new BAMFileSpan(Arrays.asList(chunks)); | ||
| + } | ||
| + | ||
| + private Chunk chunk(long start, long end) { | ||
| + return new Chunk(start, end); | ||
| + } | ||
| +} | ||
This sanity check (that the intervals are sorted, and that overlapping/adjacent intervals are properly merged) now happens much later than previously when calling the two-argument overload of this method (which is what the
query()methods all call). Specifically, the check now happens after the index is actually queried ingetFileSpan(). Are we certain that the process of querying the index is guaranteed not to blow up if the intervals were not optimized?