Permalink
Browse files

Make BAMFileReader and some related classes public, and expose (#786)

methods for iterating over a part of a BAM file (needed for Hadoop-BAM,
which processes BAM files in parallel).

Also, add BAMFileSpan#removeContentsAfter method to mirror
removeContentsBefore.
  • Loading branch information...
1 parent 8b9d5d5 commit 55bf01b28e3e6a321f2249475b9ca6b5459ed91a @tomwhite tomwhite committed with droazen Feb 17, 2017
@@ -40,7 +40,7 @@
/**
* Class for reading and querying BAM files.
*/
-class BAMFileReader extends SamReader.ReaderImplementation {
+public class BAMFileReader extends SamReader.ReaderImplementation {
// True if reading from a File rather than an InputStream
private boolean mIsSeekable = false;
@@ -869,25 +869,56 @@ private void assertIntervalsOptimized(final QueryInterval[] intervals) {
}
}
- private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
- final boolean contained) {
-
- assertIntervalsOptimized(intervals);
-
- // Hit the index to determine the chunk boundaries for the required data.
+ /**
+ * Use the index to determine the chunk boundaries for the required intervals.
+ * @param intervals the intervals to restrict reads to
+ * @param fileIndex the BAM index to use
+ * @return file pointer pairs corresponding to chunk boundaries
+ */
+ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) {
final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length];
- final BAMIndex fileIndex = getIndex();
for (int i = 0; i < intervals.length; ++i) {
final QueryInterval interval = intervals[i];
final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
inputSpans[i] = span;
}
- final long[] filePointers;
+ final BAMFileSpan span;
if (inputSpans.length > 0) {
- filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray();
+ span = BAMFileSpan.merge(inputSpans);
} else {
- filePointers = null;
+ span = null;
}
+ return span;
+ }
+
+ private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
+ final boolean contained) {
+
+ assertIntervalsOptimized(intervals);
+
+ BAMFileSpan span = getFileSpan(intervals, getIndex());
+
+ // Create an iterator over the above chunk boundaries.
+ final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray());
+
+ // Add some preprocessing filters for edge-case reads that don't fit into this
+ // query type.
+ return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained));
+ }
+
+ /**
+ * Prepare to iterate through SAMRecords that match the given intervals.
+ * @param intervals the intervals to restrict reads to
+ * @param contained if <code>true</code>, return records that are strictly
+ * contained in the intervals, otherwise return records that overlap
+ * @param filePointers file pointer pairs corresponding to chunk boundaries for the
+ * intervals
+ */
+ public CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals,
+ final boolean contained,
+ final long[] filePointers) {
+
+ assertIntervalsOptimized(intervals);
// Create an iterator over the above chunk boundaries.
final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
@@ -115,15 +115,55 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
validateSorted();
final BAMFileSpan trimmedChunkList = new BAMFileSpan();
+ final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart();
for(final Chunk chunkToTrim: chunks) {
- if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
- if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
+ if(chunkToTrim.getChunkEnd() > chunkStart) {
+ if(chunkToTrim.getChunkStart() >= chunkStart) {
// This chunk from the list is completely beyond the start of the filtering chunk.
trimmedChunkList.add(chunkToTrim.clone());
}
else {
// This chunk from the list partially overlaps the filtering chunk and must be trimmed.
- trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
+ trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd()));
+ }
+ }
+ }
+ return trimmedChunkList;
+ }
+
+ /**
+ * Creates a new file span by removing all chunks after the given file span ends.
+ * If a chunk in the chunk list starts before and ends after the given
+ * chunk, the second portion of the chunk will be deleted.
+ * @param fileSpan The filespan after which to eliminate.
+ * @return A new BAMFileSpan which contains the portion of the chunk list before the
+ * given chunk.
+ */
+ public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) {
+ if(fileSpan == null)
+ return clone();
+
+ if(!(fileSpan instanceof BAMFileSpan))
+ throw new SAMException("Unable to compare ");
+
+ final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
+
+ if(bamFileSpan.isEmpty())
+ return clone();
+
+ validateSorted();
+
+ final BAMFileSpan trimmedChunkList = new BAMFileSpan();
+ final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd();
+ for(final Chunk chunkToTrim: chunks) {
+ if(chunkToTrim.getChunkStart() < chunkEnd) {
+ if(chunkToTrim.getChunkEnd() <= chunkEnd) {
+ // This chunk from the list is completely before the end of the filtering chunk.
+ trimmedChunkList.add(chunkToTrim.clone());
+ }
+ else {
+ // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
+ trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd));
}
}
}
@@ -381,7 +381,11 @@ public PrimitiveSamReaderToSamReaderAdapter(final PrimitiveSamReader p, final Sa
this.resource = resource;
}
- PrimitiveSamReader underlyingReader() {
+ /**
+ * Access the underlying {@link PrimitiveSamReader} used by this adapter.
+ * @return the {@link PrimitiveSamReader} used by this adapter.
+ */
+ public PrimitiveSamReader underlyingReader() {
return p;
}
@@ -0,0 +1,70 @@
+package htsjdk.samtools;
+
+import java.util.Arrays;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class BAMFileSpanTest {
+ @Test(dataProvider = "testRemoveContentsBeforeProvider")
+ public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff,
+ BAMFileSpan expectedSpan) {
+ // only start value in cutoff is used
+ Assert.assertEquals(
+ ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(),
+ expectedSpan.getChunks());
+ }
+
+ @DataProvider(name = "testRemoveContentsBeforeProvider")
+ private Object[][] testRemoveContentsBeforeProvider() {
+ return new Object[][] {
+ { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(6,0)), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(7,0)), span(chunk(7,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(9,0)), span(chunk(9,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(10,0)), span(chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(11,0)), span(chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(12,0)), span(chunk(12,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(15,0)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(16,0)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(6,10), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(16,17), chunk(18,19)), span() },
+ };
+ }
+
+ @Test(dataProvider = "testRemoveContentsAfterProvider")
+ public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff,
+ BAMFileSpan expectedSpan) {
+ // only end value in cutoff is used
+ Assert.assertEquals(
+ ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(),
+ expectedSpan.getChunks());
+ }
+
+ @DataProvider(name = "testRemoveContentsAfterProvider")
+ private Object[][] testRemoveContentsAfterProvider() {
+ return new Object[][] {
+ { span(chunk(6,10), chunk(11,15)), null, span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6)), span() },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,7)), span(chunk(6,7)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,9)), span(chunk(6,9)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,10)), span(chunk(6,10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,11)), span(chunk(6,10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,12)), span(chunk(6,10), chunk(11,12)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,15)), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,16)), span(chunk(6,10), chunk(11,15)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,10)), span(chunk(6, 10)) },
+ { span(chunk(6,10), chunk(11,15)), span(chunk(0,6), chunk(7,16)), span(chunk(6, 10), chunk(11,15)) },
+ };
+ }
+
+ private BAMFileSpan span(Chunk... chunks) {
+ return new BAMFileSpan(Arrays.asList(chunks));
+ }
+
+ private Chunk chunk(long start, long end) {
+ return new Chunk(start, end);
+ }
+}

0 comments on commit 55bf01b

Please sign in to comment.