Skip to content

Commit

Permalink
fix for unsigned int in CRAM tags, issue #322
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimzalunin committed Nov 11, 2015
1 parent 85fd5c9 commit 7749f23
Show file tree
Hide file tree
Showing 13 changed files with 487 additions and 103 deletions.
12 changes: 6 additions & 6 deletions src/java/htsjdk/samtools/BinaryTagCodec.java
Expand Up @@ -320,13 +320,13 @@ private static Object readSingleValue(final byte tagType, final ByteBuffer byte
return (char)byteBuffer.get();
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
if (val <= Integer.MAX_VALUE) {
return (int)val;
if (SAMRecord.isValidUnsignedInteger(val)) {
return val;
} else {
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
"Unsigned integer is out of range: " + val, null), validationStringency);
return val;
}
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
"Tag value " + val + " too large to store as signed integer.", null), validationStringency);
// convert to unsigned int stored in a long
return val;
case 'i':
return byteBuffer.getInt();
case 's':
Expand Down
2 changes: 2 additions & 0 deletions src/java/htsjdk/samtools/CRAMFileWriter.java
Expand Up @@ -379,6 +379,8 @@ protected void finish() {
outputStream.flush();
if (indexer != null)
indexer.finish();
}catch (final RuntimeException re) {
throw re;
} catch (final Exception e) {
throw new RuntimeException(e);
}
Expand Down
21 changes: 13 additions & 8 deletions src/java/htsjdk/samtools/CRAMIterator.java
Expand Up @@ -83,10 +83,10 @@ public void setValidationStringency(
public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}
this.referenceSource = new ReferenceSource();
} else
this.referenceSource = referenceSource;
this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
final CramContainerIterator containerIterator = new CramContainerIterator(this.countingInputStream);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
Expand All @@ -101,10 +101,10 @@ public CRAMIterator(final InputStream inputStream, final ReferenceSource referen
public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}
this.referenceSource = new ReferenceSource();
} else
this.referenceSource = referenceSource;
this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
final CramSpanContainerIterator containerIterator = CramSpanContainerIterator.fromFileSpan(seekableStream, coordinates);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
Expand Down Expand Up @@ -153,7 +153,7 @@ private void nextContainer() throws IOException, IllegalArgumentException,
else
cramRecords.clear();

parser.getRecords(container, cramRecords);
parser.getRecords(container, cramRecords, validationStringency);

if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
refs = new byte[]{};
Expand Down Expand Up @@ -253,7 +253,12 @@ public boolean hasNext() {
if (!iterator.hasNext()) {
try {
nextContainer();
} catch (final Exception e) {
} catch (CRAMException ce) {
throw ce;
}catch (SAMFormatException se) {
throw se;
}
catch (final Exception e) {
throw new RuntimeEOFException(e);
}
}
Expand Down
91 changes: 76 additions & 15 deletions src/java/htsjdk/samtools/SAMRecord.java
Expand Up @@ -138,6 +138,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*/
public static final int MAX_INSERT_SIZE = 1<<29;

public static final long MAX_UINT = 2L * Integer.MAX_VALUE + 1;

private String mReadName = null;
private byte[] mReadBases = NULL_SEQUENCE;
private byte[] mBaseQualities = NULL_QUALS;
Expand Down Expand Up @@ -1158,6 +1160,32 @@ public Object getAttribute(final short tag) {
}
}

/**
* A convinience method that will return a valid unsigned integer or fail with an exception if the tag value is invalid.
* @param tag an unsigned integer represented as a long value
* @return valid unsigned integer associated with the tag
* @throws {@link htsjdk.samtools.SAMException} if the value is out of range
*/
public Long getUnsignedIntegerAttribute(final short tag) throws SAMException{
final Object value = getAttribute(tag);
if (value == null) {
return null;
}

if (value instanceof Long) {
final long lValue = (Long) value;
if (lValue >= 0 && lValue <= MAX_UINT) {
return lValue;
} else {
throw new SAMException("Unsigned integer value of tag " +
SAMTagUtil.getSingleton().makeStringTag(tag) + " is out of boundaries: " + lValue);
}
} else {
throw new SAMException("Unexpected attribute value data type " + value.getClass() + " for tag " +
SAMTagUtil.getSingleton().makeStringTag(tag));
}
}

/**
* Set a named attribute onto the SAMRecord. Passing a null value causes the attribute to be cleared.
* @param tag two-character tag name. See http://samtools.sourceforge.net/SAM1.pdf for standard and user-defined tags.
Expand Down Expand Up @@ -1206,33 +1234,66 @@ protected void setAttribute(final short tag, final Object value) {
setAttribute(tag, value, false);
}

protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value != null &&
!(value instanceof Byte || value instanceof Short || value instanceof Integer ||
value instanceof String || value instanceof Character || value instanceof Float ||
value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
value instanceof float[])) {
throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
SAMTagUtil.getSingleton().makeStringTag(tag));
/**
* Checks if a long value is within the range of unsigned integer.
* @param value a long value to check
* @return true if within range and false otherwise
*/
public static final boolean isValidUnsignedInteger(long value) {
return value >= 0 && value <= MAX_UINT;
}

/**
* Checks if the value is allowed as an attribute value.
*
* @param value the value to be checked
* @return true if the value is valid and false otherwise
*/
public static final boolean isAllowedAttributeValue(final Object value) {
if (value instanceof Byte || value instanceof Short || value instanceof Integer ||
value instanceof String || value instanceof Character || value instanceof Float ||
value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
value instanceof float[]) {
return true;
}

// a special case of unsigned integers:
if (value instanceof Long) {
return isValidUnsignedInteger((Long) value)
|| ((Long) value >= Integer.MIN_VALUE && (Long) value <= Integer.MAX_VALUE);
}
return false;
}

protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value == null) {
if (this.mAttributes != null) this.mAttributes = this.mAttributes.remove(tag);
// setting a tag value to null removes the tag:
if (this.mAttributes != null) {
this.mAttributes = this.mAttributes.remove(tag);
}
return;
}
else {

if (isAllowedAttributeValue(value)) {
final SAMBinaryTagAndValue tmp;
if(!isUnsignedArray) {
if (!isUnsignedArray) {
tmp = new SAMBinaryTagAndValue(tag, value);
}
else {
} else {
if (!value.getClass().isArray() || value instanceof float[]) {
throw new SAMException("Attribute type " + value.getClass() +
" cannot be encoded as an unsigned array. Tag: " +
SAMTagUtil.getSingleton().makeStringTag(tag));
}
tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, value);
}
if (this.mAttributes == null) this.mAttributes = tmp;
else this.mAttributes = this.mAttributes.insert(tmp);
if (this.mAttributes == null) {
this.mAttributes = tmp;
} else {
this.mAttributes = this.mAttributes.insert(tmp);
}
} else {
throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
SAMTagUtil.getSingleton().makeStringTag(tag));
}
}

Expand Down
16 changes: 13 additions & 3 deletions src/java/htsjdk/samtools/TextTagCodec.java
Expand Up @@ -72,8 +72,9 @@ public String encode(final String tagName, Object value) {
value = getArrayType(value, false) + "," + encodeArrayValue(value);
} else if (tagType == 'i') {
final long longVal = ((Number) value).longValue();
if (longVal > Integer.MAX_VALUE || longVal < Integer.MIN_VALUE) {
throw new SAMFormatException("Value for tag " + tagName + " cannot be stored in an Integer: " + longVal);
// as the spec says: [−2^31, 2^32)
if (longVal < Integer.MIN_VALUE || longVal > SAMRecord.MAX_UINT) {
throw new IllegalArgumentException("Value for tag " + tagName + " cannot be stored in an Integer: " + longVal);
}
}
sb.append(tagType);
Expand Down Expand Up @@ -182,11 +183,20 @@ private Object convertStringToObject(final String type, final String stringVal)
}
return stringVal.charAt(0);
} else if (type.equals("i")) {
final long lValue;
try {
return new Integer(stringVal);
lValue = Long.valueOf(stringVal);
} catch (NumberFormatException e) {
throw new SAMFormatException("Tag of type i should have signed decimal value");
}
if (lValue >= Integer.MIN_VALUE && lValue <= Integer.MAX_VALUE) {
return (int) lValue;
}
if (SAMRecord.isValidUnsignedInteger(lValue)) {
return lValue;
} else {
throw new SAMFormatException("Integer is out of allowed boundaries: " + stringVal);
}
} else if (type.equals("f")) {
try {
return new Float(stringVal);
Expand Down
13 changes: 7 additions & 6 deletions src/java/htsjdk/samtools/cram/build/ContainerParser.java
Expand Up @@ -20,6 +20,7 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.reader.CramRecordReader;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory.DataReaderWithStats;
Expand Down Expand Up @@ -50,14 +51,14 @@ public ContainerParser(final SAMFileHeader samFileHeader) {
}

public List<CramCompressionRecord> getRecords(final Container container,
ArrayList<CramCompressionRecord> records) throws IllegalArgumentException,
ArrayList<CramCompressionRecord> records, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
final long time1 = System.nanoTime();
if (records == null)
records = new ArrayList<CramCompressionRecord>(container.nofRecords);

for (final Slice slice : container.slices)
records.addAll(getRecords(slice, container.header));
records.addAll(getRecords(slice, container.header, validationStringency));

final long time2 = System.nanoTime();

Expand All @@ -73,7 +74,7 @@ public List<CramCompressionRecord> getRecords(final Container container,
}

ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> records,
final Slice slice, final CompressionHeader header) throws IllegalArgumentException,
final Slice slice, final CompressionHeader header, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
String seqName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
switch (slice.sequenceId) {
Expand All @@ -97,7 +98,7 @@ ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> rec
}

long time;
final CramRecordReader reader = new CramRecordReader();
final CramRecordReader reader = new CramRecordReader(validationStringency);
dataReaderFactory.buildReader(reader, new DefaultBitInputStream(
new ByteArrayInputStream(slice.coreBlock.getRawContent())),
inputMap, header, slice.sequenceId);
Expand Down Expand Up @@ -150,8 +151,8 @@ ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> rec
return records;
}

List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header)
List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, ValidationStringency validationStringency)
throws IllegalArgumentException, IllegalAccessException {
return getRecords(null, slice, header);
return getRecords(null, slice, header, validationStringency);
}
}
Expand Up @@ -17,7 +17,9 @@
*/
package htsjdk.samtools.cram.encoding.reader;

import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
import htsjdk.samtools.cram.encoding.readfeatures.Bases;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
Expand All @@ -38,6 +40,11 @@

public class CramRecordReader extends AbstractReader {
private CramCompressionRecord prevRecord;
private ValidationStringency validationStringency;

public CramRecordReader(ValidationStringency validationStringency) {
this.validationStringency = validationStringency;
}

@SuppressWarnings("ConstantConditions")
public void read(final CramCompressionRecord cramRecord) {
Expand Down Expand Up @@ -87,7 +94,7 @@ public void read(final CramCompressionRecord cramRecord) {
for (int i = 0; i < ids.length; i++) {
final int id = ReadTag.name3BytesToInt(ids[i]);
final DataReader<byte[]> dataReader = tagValueCodecs.get(id);
final ReadTag tag = new ReadTag(id, dataReader.readData());
final ReadTag tag = new ReadTag(id, dataReader.readData(), validationStringency);
cramRecord.tags[i] = tag;
}
}
Expand Down Expand Up @@ -190,6 +197,7 @@ public void read(final CramCompressionRecord cramRecord) {
if (prevRecord != null)
System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
prevRecord.toString());
if (e instanceof SAMFormatException) throw (SAMFormatException)e;
throw new RuntimeException(e);
}
}
Expand Down

0 comments on commit 7749f23

Please sign in to comment.