Fix for picard issue 574: cram reader should look for the index file automatically #716

Merged
merged 1 commit into from Oct 27, 2016
Jump to file or symbol
Failed to load files and symbols.
+128 −34
Split
@@ -26,7 +26,7 @@
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.CoordMath;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeEOFException;
import java.io.File;
@@ -57,6 +57,8 @@
private ValidationStringency validationStringency;
+ private final static Log log = Log.getInstance(CRAMFileReader.class);
+
/**
* Create a CRAMFileReader from either a file or input stream using the reference source returned by
* {@link ReferenceSource#getDefaultCRAMReferenceSource() getDefaultCRAMReferenceSource}.
@@ -95,6 +97,9 @@ public CRAMFileReader(final File cramFile, final InputStream inputStream,
this.cramFile = cramFile;
this.inputStream = inputStream;
this.referenceSource = referenceSource;
+ if (cramFile != null) {
+ mIndexFile = findIndexForFile(null, cramFile);
+ }
getIterator();
}
@@ -117,7 +122,7 @@ public CRAMFileReader(final File cramFile, final File indexFile,
}
this.cramFile = cramFile;
- this.mIndexFile = indexFile;
+ mIndexFile = findIndexForFile(indexFile, cramFile);
this.referenceSource = referenceSource;
getIterator();
@@ -140,6 +145,7 @@ public CRAMFileReader(final File cramFile, final CRAMReferenceSource referenceSo
this.cramFile = cramFile;
this.referenceSource = referenceSource;
+ mIndexFile = findIndexForFile(null, cramFile);
getIterator();
}
@@ -164,21 +170,8 @@ public CRAMFileReader(final InputStream inputStream, final SeekableStream indexI
if (referenceSource == null) {
throw new IllegalArgumentException("A reference is required for CRAM readers");
}
-
- this.inputStream = inputStream;
this.referenceSource = referenceSource;
- this.validationStringency = validationStringency;
-
- iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
- if (indexInputStream != null) {
- SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
- if (null != baiStream) {
- mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
- }
- else {
- throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream");
- }
- }
+ initWithStreams(inputStream, indexInputStream, validationStringency);
}
/**
@@ -196,7 +189,7 @@ public CRAMFileReader(final InputStream inputStream, final SeekableStream indexI
public CRAMFileReader(final InputStream stream,
final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
- this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
+ this(stream, indexFile == null ? null : new SeekableFileStream(indexFile), referenceSource, validationStringency);
}
/**
@@ -211,11 +204,44 @@ public CRAMFileReader(final InputStream stream,
*
* @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null
*/
- public CRAMFileReader(final File cramFile,
- final File indexFile, final CRAMReferenceSource referenceSource,
+ public CRAMFileReader(final File cramFile, final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
- this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency);
+ if (cramFile == null) {
+ throw new IllegalArgumentException("Input file can not be null for CRAM reader");
+ }
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
this.cramFile = cramFile;
+ this.referenceSource = referenceSource;
+ this.mIndexFile = findIndexForFile(indexFile, cramFile);
+ final SeekableFileStream indexStream = this.mIndexFile == null ? null : new SeekableFileStream(this.mIndexFile);
+ initWithStreams(new FileInputStream(cramFile), indexStream, validationStringency);
+ }
+
+ private void initWithStreams(final InputStream inputStream, final SeekableStream indexInputStream,
+ final ValidationStringency validationStringency) throws IOException {
+ this.inputStream = inputStream;
+ this.validationStringency = validationStringency;
+ iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
+ if (indexInputStream != null) {
+ SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ if (null != baiStream) {
+ mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ }
+ else {
+ throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream");
+ }
+ }
+ }
+
+ private File findIndexForFile(File indexFile, final File cramFile) {
+ indexFile = indexFile == null ? SamFiles.findIndex(cramFile) : indexFile;
+ if (indexFile != null && indexFile.lastModified() < cramFile.lastModified()) {
+ log.warn("CRAM index file " + indexFile.getAbsolutePath() +
+ " is older than CRAM " + cramFile.getAbsolutePath());
+ }
+ return indexFile;
}
@Override
@@ -27,6 +27,7 @@
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.Log;
+import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -42,13 +43,18 @@
public class CRAMFileReaderTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");
+ private static final File CRAM_WITH_CRAI = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ private static final File CRAM_WITHOUT_CRAI = new File(TEST_DATA_DIR, "cram_query_sorted.cram");
+ private static final ReferenceSource REFERENCE = createReferenceSource();
+ private static final File INDEX_FILE = new File(TEST_DATA_DIR, "cram_with_crai_index.cram.crai");
+
@BeforeClass
public void initClass() {
Log.setGlobalLogLevel(Log.LogLevel.ERROR);
}
- private ReferenceSource createReferenceSource() {
+ private static ReferenceSource createReferenceSource() {
byte[] refBases = new byte[10 * 10];
Arrays.fill(refBases, (byte) 'A');
InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
@@ -60,19 +66,17 @@ private ReferenceSource createReferenceSource() {
@Test(description = "Test CRAMReader 1 reference required", expectedExceptions = IllegalStateException.class)
public void testCRAMReader1_ReferenceRequired() {
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
InputStream bis = null;
// assumes that reference_fasta property is not set and the download service is not enabled
- new CRAMFileReader(file, bis);
+ new CRAMFileReader(CRAM_WITH_CRAI, bis);
}
// constructor 2: CRAMFileReader(final File cramFile, final InputStream inputStream, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 2 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader2ReferenceRequired() {
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
InputStream bis = null;
- new CRAMFileReader(file, bis, null);
+ new CRAMFileReader(CRAM_WITH_CRAI, bis, null);
}
@Test(description = "Test CRAMReader 2 input required", expectedExceptions = IllegalArgumentException.class)
@@ -82,31 +86,66 @@ public void testCRAMReader2_InputRequired() {
new CRAMFileReader(file, bis, createReferenceSource());
}
+ @Test
+ public void testCRAMReader2_ShouldAutomaticallyFindCRAMIndex() {
+ InputStream inputStream = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, inputStream, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find CRAM existing index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader2_WithoutCRAMIndex() {
+ InputStream inputStream = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, inputStream, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 3: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 3 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader3_RequiredReference() {
- File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
File indexFile = null;
ReferenceSource refSource = null;
- new CRAMFileReader(inputFile, indexFile, refSource);
+ new CRAMFileReader(CRAM_WITH_CRAI, indexFile, refSource);
}
@Test(description = "Test CRAMReader 3 input required", expectedExceptions = IllegalArgumentException.class)
- public void testCRAMReader3_InputRequirted() {
+ public void testCRAMReader3_InputRequired() {
File inputFile = null;
File indexFile = null;
ReferenceSource refSource = null;
new CRAMFileReader(inputFile, indexFile, refSource);
}
+ @Test
+ public void testCRAMReader3_ShouldAutomaticallyFindCRAMIndex() {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test
+ public void testCRAMReader3_ShouldUseCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader3_WithoutCRAMIndex() {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 4: CRAMFileReader(final File cramFile, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 4 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader4_ReferenceRequired() {
- File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
ReferenceSource refSource = null;
- new CRAMFileReader(inputFile, refSource);
+ new CRAMFileReader(CRAM_WITH_CRAI, refSource);
}
@Test(description = "Test CRAMReader 4 input required", expectedExceptions = IllegalArgumentException.class)
@@ -115,6 +154,19 @@ public void testCRAMReader4_InputRequired() {
new CRAMFileReader(inputFile, createReferenceSource());
}
+ @Test
+ public void testCRAMReader4_ShouldAutomaticallyFindCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader4_WithoutCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 5: CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream,
// final ReferenceSource referenceSource, final ValidationStringency validationStringency)
@Test(description = "Test CRAMReader 5 reference required", expectedExceptions = IllegalArgumentException.class)
@@ -146,18 +198,34 @@ public void testCRAMReader6_ReferenceRequired() throws IOException {
public void testCRAMReader6_InputRequired() throws IOException {
InputStream bis = null;
File file = null;
- ReferenceSource refSource = null;
new CRAMFileReader(bis, file, createReferenceSource(), ValidationStringency.STRICT);
}
// constructor 7: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource,
// final ValidationStringency validationStringency)
@Test(description = "Test CRAMReader 7 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader7_ReferenceRequired() throws IOException {
- InputStream bis = new ByteArrayInputStream(new byte[0]);
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
ReferenceSource refSource = null;
- new CRAMFileReader(file, file, refSource, ValidationStringency.STRICT);
+ new CRAMFileReader(CRAM_WITH_CRAI, CRAM_WITH_CRAI, refSource, ValidationStringency.STRICT);
}
+ @Test
+ public void testCRAMReader7_ShouldAutomaticallyFindCRAMIndex()throws IOException {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT);
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test
+ public void testCRAMReader7_ShouldUseCRAMIndex() throws IOException {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE, ValidationStringency.STRICT);
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader7_WithoutCRAMIndex() throws IOException {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT);
+ reader.getIndex();
+ }
}