Fix for issue #513: NPE in ValidateSamFile with CRAM #735

Merged
merged 3 commits into from Mar 7, 2017
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -286,8 +286,7 @@ private void validateSamRecordsAndQualityFormat(final Iterable<SAMRecord> samRec
if (cigarIsValid) {
try {
validateNmTag(record, recordNumber);
- }
- catch (SAMException e) {
+ } catch (SAMException e) {
if (hasValidSortOrder) {
// If a CRAM file has an invalid sort order, the ReferenceFileWalker will throw a
// SAMException due to an out of order request when retrieving reference bases during NM
@@ -539,12 +538,12 @@ private void validateHeader(final SAMFileHeader fileHeader) {
"A platform (PL) attribute was not found for read group ",
readGroupID));
}
- else {
+ else {
// NB: cannot be null, so not catching a NPE
try {
SAMReadGroupRecord.PlatformValue.valueOf(platformValue.toUpperCase());
} catch (IllegalArgumentException e) {
- addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE,
+ addError(new SAMValidationError(Type.INVALID_PLATFORM_VALUE,
"The platform (PL) attribute (" + platformValue + ") + was not one of the valid values for read group ",
readGroupID));
}
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -83,7 +83,7 @@ public ReferenceSequence get(final int sequenceIndex) {
}
referenceSequence = null;
- if(referenceSequenceFile.isIndexed()) {
+ if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) {
final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex);
if(samSequenceRecord != null) {
referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ;
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -27,6 +27,7 @@
import htsjdk.samtools.BamIndexValidator.IndexValidationStringency;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.reference.FastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.CloserUtil;
@@ -64,6 +65,20 @@ public void testValidSamFile() throws Exception {
}
@Test
+ public void testValidCRAMFileWithoutSeqDict() throws Exception {
+ final File reference = new File(TEST_DATA_DIR, "nm_tag_validation.fa");
+ final SamReader samReader = SamReaderFactory
+ .makeDefault()
+ .validationStringency(ValidationStringency.SILENT)
+ .referenceSequence(reference)
+ .open(new File(TEST_DATA_DIR, "nm_tag_validation.cram"));
+ final Histogram<String> results = executeValidation(samReader,
+ new FastaSequenceFile(reference, true),
+ IndexValidationStringency.EXHAUSTIVE);
+ Assert.assertTrue(!results.isEmpty());
+ }
+
+ @Test
public void testSamFileVersion1pt5() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "test_samfile_version_1pt5.bam"));
final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
@@ -231,26 +246,32 @@ public void testNmFlagValidation() throws IOException {
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), new ReferenceSequenceFile() {
private int index = 0;
+ @Override
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
+ @Override
public ReferenceSequence nextSequence() {
final byte[] bases = new byte[10000];
Arrays.fill(bases, (byte) 'A');
return new ReferenceSequence("foo", index++, bases);
}
+ @Override
public void reset() {
this.index = 0;
}
+ @Override
public boolean isIndexed() { return false; }
+ @Override
public ReferenceSequence getSequence(final String contig) {
throw new UnsupportedOperationException();
}
+ @Override
public ReferenceSequence getSubsequenceAt(final String contig, final long start, final long stop) {
throw new UnsupportedOperationException();
}
@@ -0,0 +1,71 @@
+>17 17:1-4200
+AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAA
+TGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGA
+GCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAA
+GACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACAC
+GCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACA
+GTGCCTGCGACAAAGCTGAATGCTATCATTTAAAAACTCCTTGCTGGTTTGAGAGGCAGA
+AAATGATATCTCATAGTTGCTTTACTTTGCATATTTTAAAATTGTGACTTTCATGGCATA
+AATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAG
+GAGAGAGAAATGAAGACATATGTCCACACAAAAACCTGTTCATTGCAGCTTTCTACCATC
+ACCAAAAATTGCAAACAACCACACGCCCTTCAACTGGGGAACTCATCAACAACAAACTTG
+TGGTTTACCCACACAATGGAAGACCACTTAGCAACAAAAAGGACCAAACTCCTGGTACAT
+GCAACTGACAGATGAATCTCAAACGCATTCCTCCGTGTGAAAGAAGCCGGACTCACAGGG
+CAACACACTATCTGACTGTTTCATGGGAAAGTCTGGAAACGGCAACACCATTGAGACAGA
+AAACAGGTGAGTGGTTGCCTGGGGCCAGGGAACTTTCTGGGGTCATATTCTCTGTGTTGA
+TTCTGGTGGTGGAAACAAGACTGTCCCAGCCTGGGTGATACAGCGAGACCCCATCTCTAC
+CAAAAAATTAAAAATTAGCTGGGCATGGTGGTGCATGCCTGTAGTCCCAGCTATTCACAG
+TGCTGAGGTGGGAAGATGCTTGAGCCCAGGAGTTCAAGGCTGCAATGAGCTATGATTGCG
+CCACTGCACTTTGGCCTGGACAACAGAGCAAAACCCTGTCTCTAAAAAAAGAAAAGAAAA
+GAAAAACTCACTGGATATGAATGATACAGGTTGAGGATCCATTATCTGAAATGCTTGGAC
+CAGATGTTTTGAATTTTGGATTTTTTCATATTTTGTAATCTTTGCAGTATATTTACCAGT
+TCAGCATCCCTAACTCAAAAATTCAAAAATCTGAAATCCCAAACGCGCCAATAAGCATTC
+CCTTTGAGCGTCATGTCGGTGCTTGGAATGTTTGGGGTTTTGGATTTACAGCTTTGGGAC
+GCTCAACCTGTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCC
+CGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTA
+GGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGA
+GCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTT
+TGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGC
+CACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAG
+CGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCA
+CTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCC
+TGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGG
+TCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACA
+GTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAAACCCCTTCTGGTTATACATAAGACAGC
+CAGAGAAGGGAGTTGCCCAGGGTGGCACAGCACGTTGCTGCCAGTTACTGCCATTTTCAC
+GGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGC
+AGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGC
+ACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCG
+GCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAG
+CTGGGCATGGTGGCTTGCACCTGTAATCCCAGCACTTTGGGAGGCCGAGCTAGGAGGATC
+GTTTGAGTCCAGCAGTTTGAGACCAGCCTGGCCAATACGGCAAAACCCAGTCTCTACAAA
+AAATACAAAAAACAACTAGCCAGGCGTGGTGGTGCACACCTGTAGTCCCAGCTACTCAGG
+AGGCTGAGGGGGAAGGACTGCTTGAGCCCAGGAGTTTGAGGCTGCTGTGAGCTGTGATCG
+CATCACTGCATTCCAGCCCGGTGACAGAGTGAGTCACTGTCTCAAAAAAGAAAGGAAGAA
+ATAAAGAAAACAAATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGG
+CCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCT
+CTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTAT
+TTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGG
+ACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATG
+TGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCT
+TTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAG
+CAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTG
+CCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAG
+ACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGG
+GCATTGAAACTGGTTTAAAAATGTCACACCATAGGCCGGGCACAGTGGCTCACGCCTGTA
+ATCCCAGCCCTTTGGGAGGCCAGGGTGGGTGGATCACTTGAGGTCAGGAGTTCAAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTACTAAAAATACAAAAATTAGCCTGGCGTGGTG
+GCGCATGCCTGTAATCCCAGCTACTTGGGAAGCTGAGGGATGAGAACTGCTTGAACCTGG
+GAGGCAGACGTTGCAGTGAGCTGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGT
+AAGACTCTGTCTCAAAAAAAAAAAAATCACACCATTTTGGCTTCAGATTGCATATCCTCC
+TGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATAT
+ATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTG
+TTATAATAATTCCTCTAGTTCAAATTTATTCATTTTTAACTTCATAGTACCACATTCTAC
+ACACTGCCCATGTCCCCTCAAGCTTCCCCTGGCTCCTGCAACCACAAATCTACTCTCTGC
+CTCTGTGGGTTGACCTATTCTGGACACGTCATAGAAATAGAGTCCTGCAACACGTGGCCG
+TCTGTGTCTGGCTTCTCTCGCTTAGCATCTTGTTTCCAAGGTCCTCCCACAGTGTAGCAT
+GCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAT
+GGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCT
+ACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATAT
+TCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTT
+CTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCAC