Adds option to deep copy attributes in ReverseComplement #717

Merged
merged 6 commits into from Oct 27, 2016
@@ -26,17 +26,12 @@
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Locatable;
+import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.Serializable;
import java.lang.reflect.Array;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
@@ -161,6 +156,16 @@
*/
public static final int MAX_INSERT_SIZE = 1<<29;
+ /**
+ * Tags that are known to need the reverse complement if the read is reverse complemented.
+ */
+ public static List<String> TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name());
+
+ /**
+ * Tags that are known to need the reverse if the read is reverse complemented.
+ */
+ public static List<String> TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name());
+
private String mReadName = null;
private byte[] mReadBases = NULL_SEQUENCE;
private byte[] mBaseQualities = NULL_QUALS;
@@ -2112,7 +2117,8 @@ private String buildMessage(final String baseMessage, final boolean isMate) {
/**
* Note that this does a shallow copy of everything, except for the attribute list, for which a copy of the list
* is made, but the attributes themselves are copied by reference. This should be safe because callers should
- * never modify a mutable value returned by any of the get() methods anyway.
+ * never modify a mutable value returned by any of the get() methods anyway. If one of the cloned record's SEQ or
+ * QUAL needs to be modified, a deeper copy should be made (e.g. Reverse Complement).
*/
@Override
public Object clone() throws CloneNotSupportedException {
@@ -2248,5 +2254,125 @@ public final Object removeTransientAttribute(final Object key) {
if (this.transientAttributes != null) return this.transientAttributes.remove(key);
else return null;
}
-}
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Changes made after making a copy of the bases, qualities,
+ * and any attributes that will be altered. If in-place update is needed use
+ * {@link #reverseComplement(boolean)}.
+ * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ */
+ public void reverseComplement() {
+ reverseComplement(false);
+ }
+
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead
@yfarjoun

yfarjoun Oct 20, 2016

Contributor

"Optionally"? I don't see a way to disable it in this invokation.

@yfarjoun

yfarjoun Oct 20, 2016

Contributor

scratch that. I misunderstood that comment.

+ * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ *
+ * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values.
+ */
+ public void reverseComplement(boolean inplace) {
+ reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace);
+ }
+
+ /**
+ * Reverse complement bases and reverse quality scores. In addition reverse complement any
+ * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes
+ * specified by tagsToReverse.
+ */
+ public void reverseComplement(final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse, boolean inplace) {
+ final byte[] readBases = inplace ? getReadBases() : getReadBases().clone();
+ SequenceUtil.reverseComplement(readBases);
+ setReadBases(readBases);
+ final byte qualities[] = inplace ? getBaseQualities() : getBaseQualities().clone();
+ reverseArray(qualities);
+ setBaseQualities(qualities);
+
+ // Deal with tags that need to be reverse complemented
+ if (tagsToRevcomp != null) {
+ for (final String tag: tagsToRevcomp) {
+ Object value = getAttribute(tag);
+ if (value != null) {
+ if (value instanceof byte[]) {
+ value = inplace ? value : ((byte[]) value).clone();
+ SequenceUtil.reverseComplement((byte[]) value);
+ } else if (value instanceof String) {
+ //SequenceUtil.reverseComplement is in-place for bytes but copies Strings since they are immutable.
+ value = SequenceUtil.reverseComplement((String) value);
+ } else {
+ throw new UnsupportedOperationException("Don't know how to reverse complement: " + value);
+ }
+ setAttribute(tag, value);
+ }
+ }
+ }
+
+ // Deal with tags that needed to just be reversed
+ if (tagsToReverse != null) {
+ for (final String tag : tagsToReverse) {
+ Object value = getAttribute(tag);
+ if (value != null) {
+ if (value instanceof String) {
+ value = StringUtil.reverseString((String) value);
+ } else if (value.getClass().isArray()) {
+ if (value instanceof byte[]) {
+ value = inplace ? value : ((byte[]) value).clone();
+ reverseArray((byte[]) value);
+ } else if (value instanceof short[]) {
+ value = inplace ? value : ((short[]) value).clone();
+ reverseArray((short[]) value);
+ } else if (value instanceof int[]) {
+ value = inplace ? value : ((int[]) value).clone();
+ reverseArray((int[]) value);
+ } else if (value instanceof float[]) {
+ value = inplace ? value : ((float[]) value).clone();
+ reverseArray((float[]) value);
+ } else {
+ throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported.");
+ }
+ } else {
+ throw new UnsupportedOperationException("Don't know how to reverse: " + value);
+ }
+
+ setAttribute(tag, value);
+ }
+ }
+ }
+ }
+
+ private static void reverseArray(final byte[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final byte tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final short[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final short tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final int[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final int tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final float[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final float tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+}
@@ -31,99 +31,47 @@
import java.util.List;
/**
+ *
+ * Use {@link SAMRecord#reverseComplement()} instead, which defaults to making a copy of attributes for reverse
+ * complement rather than changing them in-place.
+ *
* @author alecw@broadinstitute.org
*/
+@Deprecated
public class SAMRecordUtil {
public static List<String> TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name());
public static List<String> TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name());
/**
* Reverse-complement bases and reverse quality scores along with known optional attributes that
- * need the same treatment. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * need the same treatment. Changes made in-place, instead of making a copy of the bases, qualities,
+ * or attributes. If a copy is needed use {@link #reverseComplement(SAMRecord, boolean)}.
+ * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
* for the default set of tags that are handled.
*/
public static void reverseComplement(final SAMRecord rec) {
- reverseComplement(rec, TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE);
+ rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, true);
+ }
+
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead
+ * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ *
+ * @param rec Record to reverse complement.
+ * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values.
+ */
+ public static void reverseComplement(final SAMRecord rec, boolean inplace) {
+ rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace);
}
/**
* Reverse complement bases and reverse quality scores. In addition reverse complement any
* non-null attributes specified by tagsToRevcomp and reverse and non-null attributes
* specified by tagsToReverse.
*/
- public static void reverseComplement(final SAMRecord rec, final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse) {
- final byte[] readBases = rec.getReadBases();
- SequenceUtil.reverseComplement(readBases);
- rec.setReadBases(readBases);
- final byte qualities[] = rec.getBaseQualities();
- reverseArray(qualities);
- rec.setBaseQualities(qualities);
-
- // Deal with tags that need to be reverse complemented
- if (tagsToRevcomp != null) {
- for (final String tag: tagsToRevcomp) {
- Object value = rec.getAttribute(tag);
- if (value != null) {
- if (value instanceof byte[]) SequenceUtil.reverseComplement((byte[]) value);
- else if (value instanceof String) value = SequenceUtil.reverseComplement((String) value);
- else throw new UnsupportedOperationException("Don't know how to reverse complement: " + value);
- rec.setAttribute(tag, value);
- }
- }
- }
-
- // Deal with tags that needed to just be reversed
- if (tagsToReverse != null) {
- for (final String tag : tagsToReverse) {
- Object value = rec.getAttribute(tag);
- if (value != null) {
- if (value instanceof String) {
- value = StringUtil.reverseString((String) value);
- }
- else if (value.getClass().isArray()) {
- if (value instanceof byte[]) reverseArray((byte[]) value);
- else if (value instanceof short[]) reverseArray((short[]) value);
- else if (value instanceof int[]) reverseArray((int[]) value);
- else if (value instanceof float[]) reverseArray((float[]) value);
- else throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported.");
- }
- else throw new UnsupportedOperationException("Don't know how to reverse: " + value);
-
- rec.setAttribute(tag, value);
- }
- }
- }
- }
-
- private static void reverseArray(final byte[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final byte tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final short[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final short tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final int[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final int tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final float[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final float tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
+ public static void reverseComplement(final SAMRecord rec, final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse, boolean inplace) {
+ rec.reverseComplement(tagsToRevcomp, tagsToReverse, inplace);
}
}
@@ -973,4 +973,69 @@ public void testResolveNameNullHeader() {
SAMRecord.resolveNameFromIndex(1, null);
}
+ @Test
+ public void testReverseComplement() {
+ final SAMRecord rec = createTestSamRec();
+
+ rec.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), false);
+ Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT");
+ Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH");
+ Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
+ Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT");
+ }
+
+ /**
+ * Note that since strings are immutable the Y1 attribute, which is a String, is not reversed in the original even
+ * if an in-place reverse complement occurred. The bases and qualities are byte[] so they are reversed if in-place
+ * is true.
+ */
+ @DataProvider
+ public Object [][] reverseComplementData() {
+ return new Object[][]{
+ {false, "ACACACACAC", "HHHHHIIIII", "AAAAGAAAAC", new byte[] {1,2,3,4,5}, new short[] {1,2,3,4,5}, new int[] {1,2,3,4,5}, new float[] {1,2,3,4,5}},
+ {true, "GTGTGTGTGT", "IIIIIHHHHH", "AAAAGAAAAC", new byte[] {5,4,3,2,1}, new short[] {5,4,3,2,1}, new int[] {5,4,3,2,1}, new float[] {5,4,3,2,1}},
+ };
+ }
+
+ @Test(dataProvider = "reverseComplementData")
+ public void testSafeReverseComplement(boolean inplace, String bases, String quals, String y1, byte[] x1, short[] x2, int[] x3, float[] x4) throws CloneNotSupportedException {
+ final SAMRecord original = createTestSamRec();
+ final SAMRecord cloneOfOriginal = (SAMRecord) original.clone();
+ //Runs a copy (rather than in-place) reverseComplement
+ cloneOfOriginal.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), inplace);
+
+ Assert.assertEquals(original.getReadString(), bases);
+ Assert.assertEquals(original.getBaseQualityString(), quals);
+ Assert.assertEquals(original.getByteArrayAttribute("X1"), x1);
+ Assert.assertEquals(original.getSignedShortArrayAttribute("X2"), x2);
+ Assert.assertEquals(original.getSignedIntArrayAttribute("X3"), x3);
+ Assert.assertEquals(original.getFloatArrayAttribute("X4"), x4);
+ Assert.assertEquals(original.getStringAttribute("Y1"), y1);
+
+ Assert.assertEquals(cloneOfOriginal.getReadString(), "GTGTGTGTGT");
+ Assert.assertEquals(cloneOfOriginal.getBaseQualityString(), "IIIIIHHHHH");
+ Assert.assertEquals(cloneOfOriginal.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
+ Assert.assertEquals(cloneOfOriginal.getStringAttribute("Y1"), "GTTTTCTTTT");
+
+ }
+
+ public SAMRecord createTestSamRec() {
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMRecord rec = new SAMRecord(header);
+ rec.setReadString("ACACACACAC");
+ rec.setBaseQualityString("HHHHHIIIII");
+ rec.setAttribute("X1", new byte[] {1,2,3,4,5});
+ rec.setAttribute("X2", new short[] {1,2,3,4,5});
+ rec.setAttribute("X3", new int[] {1,2,3,4,5});
+ rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f});
+ rec.setAttribute("Y1", "AAAAGAAAAC");
+
+ return(rec);
+ }
}
@@ -1,29 +0,0 @@
-package htsjdk.samtools;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import java.util.Arrays;
-
-public class SAMRecordUtilTest {
- @Test public void testReverseComplement() {
- final SAMFileHeader header = new SAMFileHeader();
- final SAMRecord rec = new SAMRecord(header);
- rec.setReadString("ACACACACAC");
- rec.setBaseQualityString("HHHHHIIIII");
- rec.setAttribute("X1", new byte[] {1,2,3,4,5});
- rec.setAttribute("X2", new short[] {1,2,3,4,5});
- rec.setAttribute("X3", new int[] {1,2,3,4,5});
- rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f});
- rec.setAttribute("Y1", "AAAAGAAAAC");
-
- SAMRecordUtil.reverseComplement(rec, Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"));
- Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT");
- Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH");
- Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
- Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
- Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
- Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
- Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT");
- }
-}