@@ -24,6 +24,8 @@
package htsjdk.samtools ;
+import htsjdk.samtools.util.Murmur3 ;
+
/* *
* This class helps us compute and compare duplicate scores, which are used for selecting the non-duplicate
* during duplicate marking (see MarkDuplicates).
@@ -33,9 +35,13 @@
public enum ScoringStrategy {
SUM_OF_BASE_QUALITIES ,
- TOTAL_MAPPED_REFERENCE_LENGTH
+ TOTAL_MAPPED_REFERENCE_LENGTH ,
+ RANDOM ,
}
+ /* * Hash used for the RANDOM scoring strategy. */
+ private static final Murmur3 hasher = new Murmur3 (1 );
+
/* * An enum to use for storing temporary attributes on SAMRecords. */
private static enum Attr { DuplicateScore }
@@ -80,6 +86,8 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS
score += SAMUtils . getMateCigar(record). getReferenceLength();
}
break ;
+ case RANDOM :
+ score += (short ) (hasher. hashUnencodedChars(record. getReadName()) >> 16 );
}
storedScore = score;
I'm curious if there's a reason you didn't opt to take the low order bits using & 0xFFFF ? Speed is not an issue here, I presume, but clarity?