From 6f0512b20523c05492811025ffc612440eec0838 Mon Sep 17 00:00:00 2001 From: Darina Nikolaeva Date: Thu, 27 Oct 2016 20:36:27 +0300 Subject: [PATCH] Fix for Issue 574: CRAM index (#716) --- src/main/java/htsjdk/samtools/CRAMFileReader.java | 66 ++++++++++----- .../java/htsjdk/samtools/CRAMFileReaderTest.java | 96 ++++++++++++++++++---- 2 files changed, 128 insertions(+), 34 deletions(-) diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java index acdb8ba8e..9a29d367f 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileReader.java +++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java @@ -26,7 +26,7 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; -import htsjdk.samtools.util.CoordMath; +import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeEOFException; import java.io.File; @@ -57,6 +57,8 @@ private ValidationStringency validationStringency; + private final static Log log = Log.getInstance(CRAMFileReader.class); + /** * Create a CRAMFileReader from either a file or input stream using the reference source returned by * {@link ReferenceSource#getDefaultCRAMReferenceSource() getDefaultCRAMReferenceSource}. @@ -95,6 +97,9 @@ public CRAMFileReader(final File cramFile, final InputStream inputStream, this.cramFile = cramFile; this.inputStream = inputStream; this.referenceSource = referenceSource; + if (cramFile != null) { + mIndexFile = findIndexForFile(null, cramFile); + } getIterator(); } @@ -117,7 +122,7 @@ public CRAMFileReader(final File cramFile, final File indexFile, } this.cramFile = cramFile; - this.mIndexFile = indexFile; + mIndexFile = findIndexForFile(indexFile, cramFile); this.referenceSource = referenceSource; getIterator(); @@ -140,6 +145,7 @@ public CRAMFileReader(final File cramFile, final CRAMReferenceSource referenceSo this.cramFile = cramFile; this.referenceSource = referenceSource; + mIndexFile = findIndexForFile(null, cramFile); getIterator(); } @@ -164,21 +170,8 @@ public CRAMFileReader(final InputStream inputStream, final SeekableStream indexI if (referenceSource == null) { throw new IllegalArgumentException("A reference is required for CRAM readers"); } - - this.inputStream = inputStream; this.referenceSource = referenceSource; - this.validationStringency = validationStringency; - - iterator = new CRAMIterator(inputStream, referenceSource, validationStringency); - if (indexInputStream != null) { - SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary()); - if (null != baiStream) { - mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary()); - } - else { - throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream"); - } - } + initWithStreams(inputStream, indexInputStream, validationStringency); } /** @@ -196,7 +189,7 @@ public CRAMFileReader(final InputStream inputStream, final SeekableStream indexI public CRAMFileReader(final InputStream stream, final File indexFile, final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency) throws IOException { - this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency); + this(stream, indexFile == null ? null : new SeekableFileStream(indexFile), referenceSource, validationStringency); } /** @@ -211,11 +204,44 @@ public CRAMFileReader(final InputStream stream, * * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final File cramFile, - final File indexFile, final CRAMReferenceSource referenceSource, + public CRAMFileReader(final File cramFile, final File indexFile, final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency) throws IOException { - this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency); + if (cramFile == null) { + throw new IllegalArgumentException("Input file can not be null for CRAM reader"); + } + if (referenceSource == null) { + throw new IllegalArgumentException("A reference is required for CRAM readers"); + } this.cramFile = cramFile; + this.referenceSource = referenceSource; + this.mIndexFile = findIndexForFile(indexFile, cramFile); + final SeekableFileStream indexStream = this.mIndexFile == null ? null : new SeekableFileStream(this.mIndexFile); + initWithStreams(new FileInputStream(cramFile), indexStream, validationStringency); + } + + private void initWithStreams(final InputStream inputStream, final SeekableStream indexInputStream, + final ValidationStringency validationStringency) throws IOException { + this.inputStream = inputStream; + this.validationStringency = validationStringency; + iterator = new CRAMIterator(inputStream, referenceSource, validationStringency); + if (indexInputStream != null) { + SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary()); + if (null != baiStream) { + mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary()); + } + else { + throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream"); + } + } + } + + private File findIndexForFile(File indexFile, final File cramFile) { + indexFile = indexFile == null ? SamFiles.findIndex(cramFile) : indexFile; + if (indexFile != null && indexFile.lastModified() < cramFile.lastModified()) { + log.warn("CRAM index file " + indexFile.getAbsolutePath() + + " is older than CRAM " + cramFile.getAbsolutePath()); + } + return indexFile; } @Override diff --git a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java index 8c610bdf7..3fcb3bdc9 100644 --- a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java +++ b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java @@ -27,6 +27,7 @@ import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.util.Log; +import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -42,13 +43,18 @@ public class CRAMFileReaderTest { private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); + private static final File CRAM_WITH_CRAI = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); + private static final File CRAM_WITHOUT_CRAI = new File(TEST_DATA_DIR, "cram_query_sorted.cram"); + private static final ReferenceSource REFERENCE = createReferenceSource(); + private static final File INDEX_FILE = new File(TEST_DATA_DIR, "cram_with_crai_index.cram.crai"); + @BeforeClass public void initClass() { Log.setGlobalLogLevel(Log.LogLevel.ERROR); } - private ReferenceSource createReferenceSource() { + private static ReferenceSource createReferenceSource() { byte[] refBases = new byte[10 * 10]; Arrays.fill(refBases, (byte) 'A'); InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile(); @@ -60,19 +66,17 @@ private ReferenceSource createReferenceSource() { @Test(description = "Test CRAMReader 1 reference required", expectedExceptions = IllegalStateException.class) public void testCRAMReader1_ReferenceRequired() { - File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); InputStream bis = null; // assumes that reference_fasta property is not set and the download service is not enabled - new CRAMFileReader(file, bis); + new CRAMFileReader(CRAM_WITH_CRAI, bis); } // constructor 2: CRAMFileReader(final File cramFile, final InputStream inputStream, final ReferenceSource referenceSource) @Test(description = "Test CRAMReader 2 reference required", expectedExceptions = IllegalArgumentException.class) public void testCRAMReader2ReferenceRequired() { - File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); InputStream bis = null; - new CRAMFileReader(file, bis, null); + new CRAMFileReader(CRAM_WITH_CRAI, bis, null); } @Test(description = "Test CRAMReader 2 input required", expectedExceptions = IllegalArgumentException.class) @@ -82,31 +86,66 @@ public void testCRAMReader2_InputRequired() { new CRAMFileReader(file, bis, createReferenceSource()); } + @Test + public void testCRAMReader2_ShouldAutomaticallyFindCRAMIndex() { + InputStream inputStream = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, inputStream, REFERENCE); + reader.getIndex(); + Assert.assertTrue(reader.hasIndex(), "Can't find CRAM existing index."); + } + + @Test(expectedExceptions = SAMException.class) + public void testCRAMReader2_WithoutCRAMIndex() { + InputStream inputStream = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, inputStream, REFERENCE); + reader.getIndex(); + } + // constructor 3: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource) @Test(description = "Test CRAMReader 3 reference required", expectedExceptions = IllegalArgumentException.class) public void testCRAMReader3_RequiredReference() { - File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); File indexFile = null; ReferenceSource refSource = null; - new CRAMFileReader(inputFile, indexFile, refSource); + new CRAMFileReader(CRAM_WITH_CRAI, indexFile, refSource); } @Test(description = "Test CRAMReader 3 input required", expectedExceptions = IllegalArgumentException.class) - public void testCRAMReader3_InputRequirted() { + public void testCRAMReader3_InputRequired() { File inputFile = null; File indexFile = null; ReferenceSource refSource = null; new CRAMFileReader(inputFile, indexFile, refSource); } + @Test + public void testCRAMReader3_ShouldAutomaticallyFindCRAMIndex() { + File indexFile = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE); + reader.getIndex(); + Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index."); + } + + @Test + public void testCRAMReader3_ShouldUseCRAMIndex() { + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE); + reader.getIndex(); + Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index."); + } + + @Test(expectedExceptions = SAMException.class) + public void testCRAMReader3_WithoutCRAMIndex() { + File indexFile = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE); + reader.getIndex(); + } + // constructor 4: CRAMFileReader(final File cramFile, final ReferenceSource referenceSource) @Test(description = "Test CRAMReader 4 reference required", expectedExceptions = IllegalArgumentException.class) public void testCRAMReader4_ReferenceRequired() { - File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); ReferenceSource refSource = null; - new CRAMFileReader(inputFile, refSource); + new CRAMFileReader(CRAM_WITH_CRAI, refSource); } @Test(description = "Test CRAMReader 4 input required", expectedExceptions = IllegalArgumentException.class) @@ -115,6 +154,19 @@ public void testCRAMReader4_InputRequired() { new CRAMFileReader(inputFile, createReferenceSource()); } + @Test + public void testCRAMReader4_ShouldAutomaticallyFindCRAMIndex() { + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, REFERENCE); + reader.getIndex(); + Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index."); + } + + @Test(expectedExceptions = SAMException.class) + public void testCRAMReader4_WithoutCRAMIndex() { + CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, REFERENCE); + reader.getIndex(); + } + // constructor 5: CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream, // final ReferenceSource referenceSource, final ValidationStringency validationStringency) @Test(description = "Test CRAMReader 5 reference required", expectedExceptions = IllegalArgumentException.class) @@ -146,7 +198,6 @@ public void testCRAMReader6_ReferenceRequired() throws IOException { public void testCRAMReader6_InputRequired() throws IOException { InputStream bis = null; File file = null; - ReferenceSource refSource = null; new CRAMFileReader(bis, file, createReferenceSource(), ValidationStringency.STRICT); } @@ -154,10 +205,27 @@ public void testCRAMReader6_InputRequired() throws IOException { // final ValidationStringency validationStringency) @Test(description = "Test CRAMReader 7 reference required", expectedExceptions = IllegalArgumentException.class) public void testCRAMReader7_ReferenceRequired() throws IOException { - InputStream bis = new ByteArrayInputStream(new byte[0]); - File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram"); ReferenceSource refSource = null; - new CRAMFileReader(file, file, refSource, ValidationStringency.STRICT); + new CRAMFileReader(CRAM_WITH_CRAI, CRAM_WITH_CRAI, refSource, ValidationStringency.STRICT); } + @Test + public void testCRAMReader7_ShouldAutomaticallyFindCRAMIndex()throws IOException { + File indexFile = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT); + Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index."); + } + + @Test + public void testCRAMReader7_ShouldUseCRAMIndex() throws IOException { + CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE, ValidationStringency.STRICT); + Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index."); + } + + @Test(expectedExceptions = SAMException.class) + public void testCRAMReader7_WithoutCRAMIndex() throws IOException { + File indexFile = null; + CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT); + reader.getIndex(); + } }