Make BAMFileReader and some related classes public #786

Merged
merged 1 commit into from Feb 17, 2017
Jump to file or symbol
Failed to load files and symbols.
+160 −15
Split
@@ -40,7 +40,7 @@
/**
* Class for reading and querying BAM files.
*/
-class BAMFileReader extends SamReader.ReaderImplementation {
+public class BAMFileReader extends SamReader.ReaderImplementation {
// True if reading from a File rather than an InputStream
private boolean mIsSeekable = false;
@@ -869,25 +869,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) {
}
}
- private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
- final boolean contained) {
-
- assertIntervalsOptimized(intervals);
-
- // Hit the index to determine the chunk boundaries for the required data.
+ /**
+ * Use the index to determine the chunk boundaries for the required intervals.
+ * @param intervals the intervals to restrict reads to
+ * @param fileIndex the BAM index to use
+ * @return file pointer pairs corresponding to chunk boundaries
+ */
+ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) {
final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length];
- final BAMIndex fileIndex = getIndex();
for (int i = 0; i < intervals.length; ++i) {
final QueryInterval interval = intervals[i];
final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
inputSpans[i] = span;
}
- final long[] filePointers;
+ final BAMFileSpan span;
if (inputSpans.length > 0) {
- filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray();
+ span = BAMFileSpan.merge(inputSpans);
} else {
- filePointers = null;
+ span = null;
}
+ return span;
+ }
+
+ private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
+ final boolean contained) {
+
+ assertIntervalsOptimized(intervals);
+
+ BAMFileSpan span = getFileSpan(intervals, getIndex());
+
+ // Create an iterator over the above chunk boundaries.
+ final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray());
+
+ // Add some preprocessing filters for edge-case reads that don't fit into this
+ // query type.
+ return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained));
+ }
+
+ /**
+ * Prepare to iterate through SAMRecords that match the given intervals.
+ * @param intervals the intervals to restrict reads to
+ * @param contained if <code>true</code>, return records that are strictly
+ * contained in the intervals, otherwise return records that overlap
+ * @param filePointers file pointer pairs corresponding to chunk boundaries for the
+ * intervals
+ */
+ public CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
+ final boolean contained,
+ final long[] filePointers) {
+
+ assertIntervalsOptimized(intervals);
@droazen

droazen Feb 14, 2017 edited

Contributor

This sanity check (that the intervals are sorted, and that overlapping/adjacent intervals are properly merged) now happens much later than previously when calling the two-argument overload of this method (which is what the query() methods all call). Specifically, the check now happens after the index is actually queried in getFileSpan(). Are we certain that the process of querying the index is guaranteed not to blow up if the intervals were not optimized?

@droazen

droazen Feb 14, 2017

Contributor

Probably we should ensure that assertIntervalsOptimized() is called as the very first step in both overloads of createIndexIterator() to be safe, even if that means that the two-argument version can't call directly into the three-argument version.

@tomwhite

tomwhite Feb 15, 2017

Contributor

Done.

// Create an iterator over the above chunk boundaries.
final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
@@ -115,15 +115,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
validateSorted();
final BAMFileSpan trimmedChunkList = new BAMFileSpan();
+ final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart();
for(final Chunk chunkToTrim: chunks) {
- if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
- if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
+ if(chunkToTrim.getChunkEnd() > chunkStart) {
@droazen

droazen Feb 14, 2017

Contributor

Just want to confirm that you now have at least one unit test that fails before this bug fix (chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart() -> chunkToTrim.getChunkEnd() > chunkStart) and passes after it.

@tomwhite

tomwhite Feb 15, 2017

Contributor

Yes, five new unit tests fail without this change.

+ if(chunkToTrim.getChunkStart() >= chunkStart) {
// This chunk from the list is completely beyond the start of the filtering chunk.
trimmedChunkList.add(chunkToTrim.clone());
}
else {
// This chunk from the list partially overlaps the filtering chunk and must be trimmed.
- trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
+ trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd()));
+ }
+ }
+ }
+ return trimmedChunkList;
+ }
+
+ /**
+ * Creates a new file span by removing all chunks after the given file span ends.
+ * If a chunk in the chunk list starts before and ends after the given
+ * chunk, the second portion of the chunk will be deleted.
+ * @param fileSpan The filespan after which to eliminate.
+ * @return A new BAMFileSpan which contains the portion of the chunk list before the
+ * given chunk.
+ */
+ public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) {
+ if(fileSpan == null)
+ return clone();
+
+ if(!(fileSpan instanceof BAMFileSpan))
+ throw new SAMException("Unable to compare ");
+
+ final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
+
+ if(bamFileSpan.isEmpty())
+ return clone();
+
+ validateSorted();
+
+ final BAMFileSpan trimmedChunkList = new BAMFileSpan();
+ final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd();
+ for(final Chunk chunkToTrim: chunks) {
+ if(chunkToTrim.getChunkStart() < chunkEnd) {
+ if(chunkToTrim.getChunkEnd() <= chunkEnd) {
+ // This chunk from the list is completely before the end of the filtering chunk.
+ trimmedChunkList.add(chunkToTrim.clone());
+ }
+ else {
+ // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
+ trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd));
@droazen

droazen Feb 14, 2017 edited

Contributor

Do we have a good test to confirm that the chunks in the returned filespan are still sorted after trimming? (applies to removeContentsBefore() as well)

@tomwhite

tomwhite Feb 15, 2017

Contributor

Yes, the test checks that the chunks are in the order specified (which is sorted).

}
}
}
@@ -381,7 +381,11 @@ public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p, final Sa
this.resource = resource;
}
- PrimitiveSamReader underlyingReader() {
+ /**
+ * Access the underlying {@link PrimitiveSamReader} used by this adapter.
+ * @return the {@link PrimitiveSamReader} used by this adapter.
+ */
+ public PrimitiveSamReader underlyingReader() {
return p;
}
@@ -0,0 +1,70 @@
+package htsjdk.samtools;
+
+import java.util.Arrays;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class BAMFileSpanTest {
+ @Test(dataProvider = "testRemoveContentsBeforeProvider")
+ public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff,
+ BAMFileSpan expectedSpan) {
+ // only start value in cutoff is used
+ Assert.assertEquals(
+ ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(),
+ expectedSpan.getChunks());
+ }
+
+ @DataProvider(name = "testRemoveContentsBeforeProvider")
+ private Object[][] testRemoveContentsBeforeProvider() {
+ return new Object[][] {
+ { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() },
+ };
+ }
+
+ @Test(dataProvider = "testRemoveContentsAfterProvider")
+ public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff,
+ BAMFileSpan expectedSpan) {
+ // only end value in cutoff is used
+ Assert.assertEquals(
+ ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(),
+ expectedSpan.getChunks());
+ }
+
+ @DataProvider(name = "testRemoveContentsAfterProvider")
+ private Object[][] testRemoveContentsAfterProvider() {
+ return new Object[][] {
+ { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) },
@droazen

droazen Feb 14, 2017

Contributor

What's missing here (and in the test cases for removeContentsBefore() as well) are test cases in which the cutoff BAMFileSpan consists of multiple chunks. As mentioned above, I think you have a bug in removeContentsAfter() in that case.

@tomwhite

tomwhite Feb 15, 2017

Contributor

Done.

+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
+ };
+ }
+
+ private BAMFileSpan span(Chunk... chunks) {
+ return new BAMFileSpan(Arrays.asList(chunks));
+ }
+
+ private Chunk chunk(long start, long end) {
+ return new Chunk(start, end);
+ }
+}