Permalink
Browse files

Added support for an empty NM tag in SA tag fields using '*' character (

#693)

previously an empty NM tag would result in a 0 in the SA field, this was deceptive since having no mismatches is not the same as having an unknown number of mismatches.  
Now a null NM tag results in * to match other null fields.
  • Loading branch information...
1 parent 255e41a commit 0e6edc78bf9859e9b3f91f1cee330f58a5b1ed61 @jamesemery jamesemery committed with lbergelson Sep 15, 2016
Showing with 44 additions and 43 deletions.
  1. +31 −29 src/main/java/htsjdk/samtools/SAMUtils.java
  2. +13 −14 src/test/java/htsjdk/samtools/SAMUtilsTest.java
@@ -1103,7 +1103,7 @@ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, fina
public static boolean isValidUnsignedIntegerAttribute(long value) {
return value >= 0 && value <= BinaryCodec.MAX_UINT;
}
-
+
/**
* Extract a List of 'other canonical alignments' from a SAM record. Those alignments are stored as a string in the 'SA' tag as defined
* in the SAM specification.
@@ -1119,40 +1119,40 @@ public static boolean isValidUnsignedIntegerAttribute(long value) {
if( saValue == null ) return Collections.emptyList();
if( ! (saValue instanceof String) ) throw new SAMException(
"Expected a String for attribute 'SA' but got " + saValue.getClass() );
-
+
final SAMRecordFactory samReaderFactory = new DefaultSAMRecordFactory();
-
- /* the spec says: "Other canonical alignments in a chimeric alignment, formatted as a
- * semicolon-delimited list: (rname,pos,strand,CIGAR,mapQ,NM;)+.
+
+ /* the spec says: "Other canonical alignments in a chimeric alignment, formatted as a
+ * semicolon-delimited list: (rname,pos,strand,CIGAR,mapQ,NM;)+.
* Each element in the list represents a part of the chimeric alignment.
* Conventionally, at a supplementary line, the 1rst element points to the primary line.
*/
-
+
/* break string using semicolon */
final String semiColonStrs[] = SEMICOLON_PAT.split((String)saValue);
-
+
/* the result list */
final List<SAMRecord> alignments = new ArrayList<>( semiColonStrs.length );
-
+
/* base SAM flag */
int record_flag = record.getFlags() ;
record_flag &= ~SAMFlag.PROPER_PAIR.flag;
record_flag &= ~SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag;
record_flag &= ~SAMFlag.READ_REVERSE_STRAND.flag;
-
-
+
+
for(int i=0; i< semiColonStrs.length;++i ) {
final String semiColonStr = semiColonStrs[i];
/* ignore empty string */
if( semiColonStr.isEmpty() ) continue;
-
+
/* break string using comma */
final String commaStrs[] = COMMA_PAT.split(semiColonStr);
if( commaStrs.length != 6 ) throw new SAMException("Bad 'SA' attribute in " + semiColonStr);
-
+
/* create the new record */
final SAMRecord otherRec = samReaderFactory.createSAMRecord( record.getHeader() );
-
+
/* copy fields from the original record */
otherRec.setReadName( record.getReadName() );
otherRec.setReadBases( record.getReadBases() );
@@ -1161,61 +1161,63 @@ public static boolean isValidUnsignedIntegerAttribute(long value) {
otherRec.setMateReferenceIndex( record.getMateReferenceIndex() );
otherRec.setMateAlignmentStart( record.getMateAlignmentStart() );
}
-
-
+
+
/* get reference sequence */
final int tid = record.getHeader().getSequenceIndex( commaStrs[0] );
if( tid == -1 ) throw new SAMException("Unknown contig in " + semiColonStr);
otherRec.setReferenceIndex( tid );
-
+
/* fill POS */
final int alignStart;
try {
alignStart = Integer.parseInt(commaStrs[1]);
} catch( final NumberFormatException err ) {
throw new SAMException("bad POS in "+semiColonStr, err);
}
-
- otherRec.setAlignmentStart( alignStart );
-
+
+ otherRec.setAlignmentStart( alignStart );
+
/* set TLEN */
- if( record.getReadPairedFlag() &&
- !record.getMateUnmappedFlag() &&
+ if( record.getReadPairedFlag() &&
+ !record.getMateUnmappedFlag() &&
record.getMateReferenceIndex() == tid ) {
otherRec.setInferredInsertSize( record.getMateAlignmentStart() - alignStart );
}
- /* set FLAG */
+ /* set FLAG */
int other_flag = record_flag;
other_flag |= (commaStrs[2].equals("+") ? 0 : SAMFlag.READ_REVERSE_STRAND.flag) ;
/* spec: Conventionally, at a supplementary line, the 1st element points to the primary line */
if( !( record.getSupplementaryAlignmentFlag() && i==0 ) ) {
other_flag |= SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag;
- }
+ }
otherRec.setFlags(other_flag);
-
+
/* set CIGAR */
otherRec.setCigar( TextCigarCodec.decode( commaStrs[3] ) );
-
+
/* set MAPQ */
try {
otherRec.setMappingQuality( Integer.parseInt(commaStrs[4]) );
} catch (final NumberFormatException err) {
throw new SAMException("bad MAPQ in "+semiColonStr, err);
}
-
+
/* fill NM */
try {
- otherRec.setAttribute( SAMTagUtil.getSingleton().NM , Integer.parseInt(commaStrs[5]) );
+ if (!commaStrs[5].equals("*")) {
+ otherRec.setAttribute(SAMTagUtil.getSingleton().NM, Integer.parseInt(commaStrs[5]));
+ }
} catch (final NumberFormatException err) {
throw new SAMException("bad NM in "+semiColonStr, err);
}
-
+
/* if strand is not the same: reverse-complement */
if( otherRec.getReadNegativeStrandFlag() != record.getReadNegativeStrandFlag() ) {
SAMRecordUtil.reverseComplement(otherRec);
}
-
+
/* add the alignment */
alignments.add( otherRec );
}
@@ -173,7 +173,7 @@ public void testClippingOfRecordWithMateAtSamePosition() {
record.setSecondOfPairFlag(true);
Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 10);
}
-
+
@Test
public void testOtherCanonicalAlignments() {
// setup the record
@@ -190,26 +190,26 @@ public void testOtherCanonicalAlignments() {
record.setMateAlignmentStart(1);
record.setReadPairedFlag(true);
record.setSupplementaryAlignmentFlag(true);//spec says first 'SA' record will be the primary record
-
+
record.setMateReferenceIndex(0);
record.setMateAlignmentStart(100);
record.setInferredInsertSize(99);
-
+
record.setReadBases("AAAAAAAAAA".getBytes());
record.setBaseQualities("##########".getBytes());
// check no alignments if no SA tag */
Assert.assertEquals(SAMUtils.getOtherCanonicalAlignments(record).size(),0);
-
-
+
+
record.setAttribute(SAMTagUtil.getSingleton().SA,
- "2,500,+,3S2=1X2=2S,60,1;" +
- "1,191,-,8M2S,60,0;");
-
+ "2,500,+,3S2=1X2=2S,60,1;" +
+ "1,191,-,8M2S,60,*;");
+
// extract suppl alignments
final List<SAMRecord> suppl = SAMUtils.getOtherCanonicalAlignments(record);
Assert.assertNotNull(suppl);
Assert.assertEquals(suppl.size(), 2);
-
+
for(final SAMRecord other: suppl) {
Assert.assertFalse(other.getReadUnmappedFlag());
Assert.assertTrue(other.getReadPairedFlag());
@@ -223,7 +223,7 @@ public void testOtherCanonicalAlignments() {
Assert.assertEquals(other.getBaseQualityString(),record.getBaseQualityString());
}
}
-
+
SAMRecord other = suppl.get(0);
Assert.assertFalse(other.getSupplementaryAlignmentFlag());//1st of suppl and 'record' is supplementary
Assert.assertEquals(other.getReferenceName(),"2");
@@ -234,18 +234,17 @@ public void testOtherCanonicalAlignments() {
Assert.assertEquals(other.getCigarString(),"3S2=1X2=2S");
Assert.assertEquals(other.getInferredInsertSize(),0);
-
+
other = suppl.get(1);
Assert.assertTrue(other.getSupplementaryAlignmentFlag());
Assert.assertEquals(other.getReferenceName(),"1");
Assert.assertEquals(other.getAlignmentStart(),191);
Assert.assertTrue(other.getReadNegativeStrandFlag());
Assert.assertEquals(other.getMappingQuality(), 60);
- Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),0);
+ Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),null);
Assert.assertEquals(other.getCigarString(),"8M2S");
Assert.assertEquals(other.getInferredInsertSize(),-91);//100(mate) - 191(other)
-
}
-
+
}

0 comments on commit 0e6edc7

Please sign in to comment.