Yf add unknown so #862

Merged
merged 7 commits into from May 2, 2017
@@ -24,6 +24,8 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.StringLineReader;
import java.io.StringWriter;
@@ -47,14 +49,17 @@
public static final String SORT_ORDER_TAG = "SO";
public static final String GROUP_ORDER_TAG = "GO";
public static final String CURRENT_VERSION = "1.5";
- public static final Set<String> ACCEPTABLE_VERSIONS =
- new HashSet<String>(Arrays.asList("1.0", "1.3", "1.4", "1.5"));
+ public static final Set<String> ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5");
+ private SortOrder sortOrder = null;
+ private GroupOrder groupOrder = null;
+
+ private static final Log log = Log.getInstance(SAMFileHeader.class);
/**
* These tags are of known type, so don't need a type field in the text representation.
*/
public static final Set<String> STANDARD_TAGS =
- new HashSet<String>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG));
+ new HashSet<>(Arrays.asList(VERSION_TAG, SORT_ORDER_TAG, GROUP_ORDER_TAG));
@Override
Set<String> getStandardTags() {
@@ -65,11 +70,11 @@
* Ways in which a SAM or BAM may be sorted.
*/
public enum SortOrder {
-
unsorted(null),
queryname(SAMRecordQueryNameComparator.class),
coordinate(SAMRecordCoordinateComparator.class),
- duplicate(SAMRecordDuplicateComparator.class); // NB: this is not in the SAM spec!
+ duplicate(SAMRecordDuplicateComparator.class), // NB: this is not in the SAM spec!
+ unknown(null);
private final Class<? extends SAMRecordComparator> comparator;
@@ -106,16 +111,14 @@ public SAMRecordComparator getComparatorInstance() {
none, query, reference
}
- private List<SAMReadGroupRecord> mReadGroups =
- new ArrayList<SAMReadGroupRecord>();
- private List<SAMProgramRecord> mProgramRecords = new ArrayList<SAMProgramRecord>();
- private final Map<String, SAMReadGroupRecord> mReadGroupMap =
- new HashMap<String, SAMReadGroupRecord>();
- private final Map<String, SAMProgramRecord> mProgramRecordMap = new HashMap<String, SAMProgramRecord>();
+ private List<SAMReadGroupRecord> mReadGroups = new ArrayList<>();
+ private List<SAMProgramRecord> mProgramRecords = new ArrayList<>();
+ private final Map<String, SAMReadGroupRecord> mReadGroupMap = new HashMap<>();
+ private final Map<String, SAMProgramRecord> mProgramRecordMap = new HashMap<>();
private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary();
- final private List<String> mComments = new ArrayList<String>();
+ final private List<String> mComments = new ArrayList<>();
private String textHeader;
- private final List<SAMValidationError> mValidationErrors = new ArrayList<SAMValidationError>();
+ private final List<SAMValidationError> mValidationErrors = new ArrayList<>();
public SAMFileHeader() {
setAttribute(VERSION_TAG, CURRENT_VERSION);
@@ -128,11 +131,11 @@ public SAMFileHeader(final SAMSequenceDictionary dict) {
}
public String getVersion() {
- return (String) getAttribute("VN");
+ return getAttribute(VERSION_TAG);
}
public String getCreator() {
- return (String) getAttribute("CR");
+ return getAttribute("CR");
}
public SAMSequenceDictionary getSequenceDictionary() {
@@ -249,26 +252,47 @@ public SAMProgramRecord createProgramRecord() {
}
public SortOrder getSortOrder() {
- final String so = getAttribute("SO");
- if (so == null || so.equals("unknown")) {
- return SortOrder.unsorted;
+ if (sortOrder == null) {
+ final String so = getAttribute(SORT_ORDER_TAG);
+ if (so == null) {
+ sortOrder = SortOrder.unsorted;
+ } else {
+ try {
+ return SortOrder.valueOf(so);
+ } catch (IllegalArgumentException e) {
+ log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'.");
+ sortOrder = SortOrder.unknown;
+ }
+ }
}
- return SortOrder.valueOf((String) so);
+ return sortOrder;
}
public void setSortOrder(final SortOrder so) {
- setAttribute("SO", so.name());
+ sortOrder = so;
+ setAttribute(SORT_ORDER_TAG, so.name());
}
public GroupOrder getGroupOrder() {
- if (getAttribute("GO") == null) {
- return GroupOrder.none;
+ if (groupOrder == null) {
+ final String go = getAttribute(GROUP_ORDER_TAG);
+ if (go == null) {
+ groupOrder = GroupOrder.none;
+ } else {
+ try {
+ return GroupOrder.valueOf(go);
+ } catch (IllegalArgumentException e) {
+ log.warn("Found non conforming header GO tag: " + go + ". Treating as 'none'.");
+ groupOrder = GroupOrder.none;
+ }
+ }
}
- return GroupOrder.valueOf((String)getAttribute("GO"));
+ return groupOrder;
}
public void setGroupOrder(final GroupOrder go) {
- setAttribute("GO", go.name());
+ groupOrder = go;
+ setAttribute(GROUP_ORDER_TAG, go.name());
}
/**
@@ -372,7 +396,7 @@ public String getSAMString() {
public static class PgIdGenerator {
private int recordCounter;
- private final Set<String> idsThatAreAlreadyTaken = new HashSet<String>();
+ private final Set<String> idsThatAreAlreadyTaken = new HashSet<>();
public PgIdGenerator(final SAMFileHeader header) {
for (final SAMProgramRecord pgRecord : header.getProgramRecords()) {
@@ -400,7 +424,6 @@ public String getNonCollidingId(final String recordId) {
idsThatAreAlreadyTaken.add(newId);
return newId;
}
-
}
}
}
@@ -228,6 +228,25 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) {
if (!parsedHeaderLine.requireTag(SAMFileHeader.VERSION_TAG)) {
return;
}
+
+ final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG);
+ try {
+ if (soString != null) SAMFileHeader.SortOrder.valueOf(soString);
+ } catch (IllegalArgumentException e) {
+ reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
+ " line has non-conforming SO tag value: "+ soString + ".",
+ SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
+ }
+
+ final String goString = parsedHeaderLine.getValue(SAMFileHeader.GROUP_ORDER_TAG);
+ try {
+ if (goString != null) SAMFileHeader.GroupOrder.valueOf(goString);
+ } catch (IllegalArgumentException e) {
+ reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
+ " line has non-conforming GO tag value: "+ goString + ".",
+ SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
+ }
+
transferAttributes(mFileHeader, parsedHeaderLine.mKeyValuePairs);
}
@@ -171,6 +171,9 @@
HEADER_RECORD_MISSING_REQUIRED_TAG,
+ /** Header tag contains illegal value */
+ HEADER_TAG_NON_CONFORMING_VALUE,
+
/** Date string is not ISO-8601 */
INVALID_DATE_STRING(Severity.WARNING),
@@ -499,10 +499,24 @@ public void duplicateReadsOutOfOrder() throws Exception {
"@RG\tID:0\tSM:Hi,Mom!\n" +
"E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";
+ final String SOTagCorrectlyProcessTestData =
+ "@HD\tVN:1.0\tSO:NOTKNOWN\n" +
+ "@SQ\tSN:chr1\tLN:101\n" +
+ "@RG\tID:0\tSM:Hi,Mom!\n" +
+ "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";
+
+ final String GOTagCorrectlyProcessTestData =
+ "@HD\tVN:1.0\tGO:NOTKNOWN\n" +
+ "@SQ\tSN:chr1\tLN:101\n" +
+ "@RG\tID:0\tSM:Hi,Mom!\n" +
+ "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";
+
return new Object[][]{
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE},
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH},
- {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}
+ {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH},
+ {SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE},
+ {GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}
};
}
@@ -97,12 +97,10 @@ public void test2() {
}
}
- private SAMFileHeader samFileHeader = new SAMFileHeader();
-
private SAMRecord buildSAMRecord(String seqName, String line) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
- baos.write("@HD\tVN:1.0\tGO:none SO:coordinate\n".getBytes());
+ baos.write("@HD\tVN:1.0\tGO:none\tSO:coordinate\n".getBytes());
baos.write(("@SQ\tSN:" + seqName + "\tLN:247249719\n").getBytes());
baos.write(line.replaceAll("\\s+", "\t").getBytes());
baos.close();