Expose the set of tags to reverse and reverse complement as parameters. #673

Merged
merged 1 commit into from Aug 2, 2016
Jump to file or symbol
Failed to load files and symbols.
+105 −28
Split
@@ -26,56 +26,104 @@
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
/**
* @author alecw@broadinstitute.org
*/
public class SAMRecordUtil {
-
- /** List of String tags that must be reversed if present when a SAMRecord is reverseComplemented */
- private static final short[] STRING_TAGS_TO_REVERSE = {
- SAMTagUtil.getSingleton().U2,
- SAMTagUtil.getSingleton().OQ
- };
+ public static List<String> TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name());
+ public static List<String> TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name());
/**
- * Reverse-complement all known sequence and base quality attributes of the SAMRecord.
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
*/
public static void reverseComplement(final SAMRecord rec) {
+ reverseComplement(rec, TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE);
+ }
+
+ /**
+ * Reverse complement bases and reverse quality scores. In addition reverse complement any
+ * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes
+ * specified by tagsToReverse.
+ */
+ public static void reverseComplement(final SAMRecord rec, final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse) {
final byte[] readBases = rec.getReadBases();
SequenceUtil.reverseComplement(readBases);
rec.setReadBases(readBases);
final byte qualities[] = rec.getBaseQualities();
reverseArray(qualities);
rec.setBaseQualities(qualities);
- final byte[] sqTagValue = (byte[])rec.getAttribute(SAMTagUtil.getSingleton().SQ);
- if (sqTagValue != null) {
- SQTagUtil.reverseComplementSqArray(sqTagValue);
- rec.setAttribute(SAMTagUtil.getSingleton().SQ, sqTagValue);
- }
- final String e2TagValue = (String)rec.getAttribute(SAMTagUtil.getSingleton().E2);
- if (e2TagValue != null) {
- final byte[] secondaryBases = StringUtil.stringToBytes(e2TagValue);
- SequenceUtil.reverseComplement(secondaryBases);
- rec.setAttribute(SAMTagUtil.getSingleton().E2, StringUtil.bytesToString(secondaryBases));
+
+ // Deal with tags that need to be reverse complemented
+ if (tagsToRevcomp != null) {
+ for (final String tag: tagsToRevcomp) {
+ Object value = rec.getAttribute(tag);
+ if (value != null) {
+ if (value instanceof byte[]) SequenceUtil.reverseComplement((byte[]) value);
+ else if (value instanceof String) value = SequenceUtil.reverseComplement((String) value);
+ else throw new UnsupportedOperationException("Don't know how to reverse complement: " + value);
+ rec.setAttribute(tag, value);
+ }
+ }
}
- for (final short stringTag : STRING_TAGS_TO_REVERSE) {
- final String value = (String)rec.getAttribute(stringTag);
- if (value != null) {
- rec.setAttribute(stringTag, StringUtil.reverseString(value));
+
+ // Deal with tags that needed to just be reversed
+ if (tagsToReverse != null) {
+ for (final String tag : tagsToReverse) {
+ Object value = rec.getAttribute(tag);
+ if (value != null) {
+ if (value instanceof String) {
+ value = StringUtil.reverseString((String) value);
+ }
+ else if (value.getClass().isArray()) {
@eitanbanks

eitanbanks Aug 1, 2016

Contributor

Just out of curiosity, why not maintain the previous behavior (converting to String and then reversing that)? Code was a lot cleaner. Is it b/c of bad performance?
Maybe pull all this out into a reverseArray() method that takes in an Object for code cleanliness?

@nh13

nh13 Aug 1, 2016

Contributor

It actually only supported String and not the array types. The increased complexity is to handle the various array types. I am not sure moving the 5 lines of code into a seperate reverseArray method is really cleaner.

+ if (value instanceof byte[]) reverseArray((byte[]) value);
+ else if (value instanceof short[]) reverseArray((short[]) value);
+ else if (value instanceof int[]) reverseArray((int[]) value);
+ else if (value instanceof float[]) reverseArray((float[]) value);
+ else throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported.");
+ }
@yfarjoun

yfarjoun Aug 2, 2016

Contributor

shouldn't there be another else clause here?

@tfenne

tfenne Aug 2, 2016

Owner

Yes, done.

+ else throw new UnsupportedOperationException("Don't know how to reverse: " + value);
+
+ rec.setAttribute(tag, value);
+ }
}
}
}
- /**
- * Reverse the given array in place.
- */
- public static void reverseArray(final byte[] array) {
- final int lastIndex = array.length - 1;
- int i, j;
- for (i=0, j=lastIndex; i<j; ++i, --j) {
+ private static void reverseArray(final byte[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
final byte tmp = array[i];
array[i] = array[j];
array[j] = tmp;
}
}
+
+ private static void reverseArray(final short[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final short tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final int[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final int tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final float[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final float tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
}
@@ -0,0 +1,29 @@
+package htsjdk.samtools;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+public class SAMRecordUtilTest {
+ @Test public void testReverseComplement() {
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMRecord rec = new SAMRecord(header);
+ rec.setReadString("ACACACACAC");
+ rec.setBaseQualityString("HHHHHIIIII");
+ rec.setAttribute("X1", new byte[] {1,2,3,4,5});
+ rec.setAttribute("X2", new short[] {1,2,3,4,5});
+ rec.setAttribute("X3", new int[] {1,2,3,4,5});
+ rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f});
+ rec.setAttribute("Y1", "AAAAGAAAAC");
+
+ SAMRecordUtil.reverseComplement(rec, Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"));
+ Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT");
+ Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH");
+ Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
+ Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT");
+ }
+}