Skip to content

Commit

Permalink
fix for unsigned int in CRAM tags, issue #322
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimzalunin committed Oct 29, 2015
1 parent 824b955 commit ed25614
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 60 deletions.
21 changes: 13 additions & 8 deletions src/java/htsjdk/samtools/CRAMIterator.java
Expand Up @@ -83,10 +83,10 @@ public void setValidationStringency(
public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}
this.referenceSource = new ReferenceSource();
} else
this.referenceSource = referenceSource;
this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
final CramContainerIterator containerIterator = new CramContainerIterator(this.countingInputStream);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
Expand All @@ -101,10 +101,10 @@ public CRAMIterator(final InputStream inputStream, final ReferenceSource referen
public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}
this.referenceSource = new ReferenceSource();
} else
this.referenceSource = referenceSource;
this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
final CramSpanContainerIterator containerIterator = CramSpanContainerIterator.fromFileSpan(seekableStream, coordinates);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
Expand Down Expand Up @@ -153,7 +153,7 @@ private void nextContainer() throws IOException, IllegalArgumentException,
else
cramRecords.clear();

parser.getRecords(container, cramRecords);
parser.getRecords(container, cramRecords, validationStringency);

if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
refs = new byte[]{};
Expand Down Expand Up @@ -253,7 +253,12 @@ public boolean hasNext() {
if (!iterator.hasNext()) {
try {
nextContainer();
} catch (final Exception e) {
} catch (CRAMException ce) {
throw ce;
}catch (SAMFormatException se) {
throw se;
}
catch (final Exception e) {
throw new RuntimeEOFException(e);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/java/htsjdk/samtools/SAMRecord.java
Expand Up @@ -1203,7 +1203,7 @@ protected void setAttribute(final short tag, final Object value) {

protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value != null &&
!(value instanceof Byte || value instanceof Short || value instanceof Integer ||
!(value instanceof Byte || value instanceof Short || value instanceof Integer ||value instanceof Long ||
value instanceof String || value instanceof Character || value instanceof Float ||
value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
value instanceof float[])) {
Expand Down
13 changes: 7 additions & 6 deletions src/java/htsjdk/samtools/cram/build/ContainerParser.java
Expand Up @@ -20,6 +20,7 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.reader.CramRecordReader;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory.DataReaderWithStats;
Expand Down Expand Up @@ -50,14 +51,14 @@ public ContainerParser(final SAMFileHeader samFileHeader) {
}

public List<CramCompressionRecord> getRecords(final Container container,
ArrayList<CramCompressionRecord> records) throws IllegalArgumentException,
ArrayList<CramCompressionRecord> records, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
final long time1 = System.nanoTime();
if (records == null)
records = new ArrayList<CramCompressionRecord>(container.nofRecords);

for (final Slice slice : container.slices)
records.addAll(getRecords(slice, container.header));
records.addAll(getRecords(slice, container.header, validationStringency));

final long time2 = System.nanoTime();

Expand All @@ -73,7 +74,7 @@ public List<CramCompressionRecord> getRecords(final Container container,
}

ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> records,
final Slice slice, final CompressionHeader header) throws IllegalArgumentException,
final Slice slice, final CompressionHeader header, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
String seqName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
switch (slice.sequenceId) {
Expand All @@ -97,7 +98,7 @@ ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> rec
}

long time;
final CramRecordReader reader = new CramRecordReader();
final CramRecordReader reader = new CramRecordReader(validationStringency);
dataReaderFactory.buildReader(reader, new DefaultBitInputStream(
new ByteArrayInputStream(slice.coreBlock.getRawContent())),
inputMap, header, slice.sequenceId);
Expand Down Expand Up @@ -150,8 +151,8 @@ ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> rec
return records;
}

List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header)
List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, ValidationStringency validationStringency)
throws IllegalArgumentException, IllegalAccessException {
return getRecords(null, slice, header);
return getRecords(null, slice, header, validationStringency);
}
}
Expand Up @@ -17,7 +17,9 @@
*/
package htsjdk.samtools.cram.encoding.reader;

import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
import htsjdk.samtools.cram.encoding.readfeatures.Bases;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
Expand All @@ -38,6 +40,11 @@

public class CramRecordReader extends AbstractReader {
private CramCompressionRecord prevRecord;
private ValidationStringency validationStringency;

public CramRecordReader(ValidationStringency validationStringency) {
this.validationStringency = validationStringency;
}

@SuppressWarnings("ConstantConditions")
public void read(final CramCompressionRecord cramRecord) {
Expand Down Expand Up @@ -87,7 +94,7 @@ public void read(final CramCompressionRecord cramRecord) {
for (int i = 0; i < ids.length; i++) {
final int id = ReadTag.name3BytesToInt(ids[i]);
final DataReader<byte[]> dataReader = tagValueCodecs.get(id);
final ReadTag tag = new ReadTag(id, dataReader.readData());
final ReadTag tag = new ReadTag(id, dataReader.readData(), validationStringency);
cramRecord.tags[i] = tag;
}
}
Expand Down Expand Up @@ -190,6 +197,7 @@ public void read(final CramCompressionRecord cramRecord) {
if (prevRecord != null)
System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
prevRecord.toString());
if (e instanceof SAMFormatException) throw (SAMFormatException)e;
throw new RuntimeException(e);
}
}
Expand Down
21 changes: 13 additions & 8 deletions src/java/htsjdk/samtools/cram/structure/ReadTag.java
Expand Up @@ -21,7 +21,10 @@
import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord.SAMTagAndValue;
import htsjdk.samtools.SAMTagUtil;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.SAMValidationError;
import htsjdk.samtools.TagValueAndUnsignedArrayFlag;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.StringUtil;

import java.nio.ByteBuffer;
Expand Down Expand Up @@ -50,10 +53,10 @@ public class ReadTag implements Comparable<ReadTag> {
private short code;
private byte index;

public ReadTag(final int id, final byte[] dataAsByteArray) {
public ReadTag(final int id, final byte[] dataAsByteArray, ValidationStringency validationStringency) {
this.type = (char) (0xFF & id);
key = new String(new char[]{(char) ((id >> 16) & 0xFF), (char) ((id >> 8) & 0xFF)});
value = restoreValueFromByteArray(type, dataAsByteArray);
value = restoreValueFromByteArray(type, dataAsByteArray, validationStringency);
keyType3Bytes = this.key + this.type;

keyType3BytesAsInt = id;
Expand Down Expand Up @@ -179,10 +182,10 @@ public byte[] getValueAsByteArray() {
return writeSingleValue((byte) type, value, false);
}

private static Object restoreValueFromByteArray(final char type, final byte[] array) {
private static Object restoreValueFromByteArray(final char type, final byte[] array, ValidationStringency validationStringency) {
final ByteBuffer buffer = ByteBuffer.wrap(array);
buffer.order(ByteOrder.LITTLE_ENDIAN);
return readSingleValue((byte) type, buffer);
return readSingleValue((byte) type, buffer, validationStringency);
}

// copied from net.sf.samtools.BinaryTagCodec 1.62:
Expand Down Expand Up @@ -365,7 +368,7 @@ private static void writeArray(final Object value,
}

public static Object readSingleValue(final byte tagType,
final ByteBuffer byteBuffer) {
final ByteBuffer byteBuffer, ValidationStringency validationStringency) {
switch (tagType) {
case 'Z':
return readNullTerminatedString(byteBuffer);
Expand All @@ -374,10 +377,12 @@ public static Object readSingleValue(final byte tagType,
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
if (val <= Integer.MAX_VALUE) {
return (int) val;
return (int)val;
}
throw new RuntimeException(
"Tag value is too large to store as signed integer.");
SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
"Tag value " + val + " too large to store as signed integer.", null), validationStringency);
// convert to unsigned int stored in a long
return val;
case 'i':
return byteBuffer.getInt();
case 's':
Expand Down
38 changes: 18 additions & 20 deletions src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
@@ -1,17 +1,19 @@
package htsjdk.samtools;

import htsjdk.samtools.*;
import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeEOFException;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;

import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;

Expand All @@ -37,25 +39,16 @@ public void testUnsorted() throws IOException {

// int test for CRAMException
// testing for a contig found in the reads but not in the reference
@Test
@Test(expectedExceptions = CRAMException.class)
public void testContigNotFoundInRef() throws IOException {
boolean sawException = false;
final File CRAMFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram");
final File refFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa");
final ReferenceSource refSource = new ReferenceSource(refFile);
final CRAMIterator iterator = new CRAMIterator(new FileInputStream(CRAMFile), refSource);
try {
while (iterator.hasNext()) {
iterator.next();
}
}
// the iterator is wrapping our exception; ensuring that it was actually a CRAMexception
catch (RuntimeEOFException e) {
Assert.assertEquals(e.getCause().getClass(), CRAMException.class);
sawException = true;
while (iterator.hasNext()) {
iterator.next();
}

if (!sawException) {Assert.fail("Expected an exception to occur, none found");}
}

@Test
Expand All @@ -67,7 +60,9 @@ public void testBizilionTags() throws IOException {
char b1 = (char) ('A' + i / 26);
char b2 = (char) ('A' + i % 26);
String tag = new String(new char[]{b1, b2});
if ("RG".equals(tag)) continue;
if ("RG".equals(tag)) {
continue;
}
record.setAttribute(tag, i);
}

Expand Down Expand Up @@ -98,7 +93,7 @@ private void testRecords(Collection<SAMRecord> records, byte[] ref) throws IOExc
}
cramFileWriter.close();

CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream)null, source, ValidationStringency.SILENT);
CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
Assert.assertTrue(iterator.hasNext());

Expand Down Expand Up @@ -126,7 +121,7 @@ private void testSingleRecord(SAMRecord record, byte[] ref) throws IOException {
cramFileWriter.addAlignment(record);
cramFileWriter.close();

CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream)null, source, ValidationStringency.SILENT);
CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
Assert.assertTrue(iterator.hasNext());
SAMRecord s2 = iterator.next();
Expand All @@ -152,8 +147,11 @@ private void testSingleRecord(byte[] bases, byte[] scores, byte[] ref) throws IO
s.setAlignmentStart(1);
s.setReferenceName("chr1");
s.setReadName("1");
if (bases == SAMRecord.NULL_SEQUENCE) s.setCigarString("10M");
else s.setCigarString(s.getReadLength() + "M");
if (bases == SAMRecord.NULL_SEQUENCE) {
s.setCigarString("10M");
} else {
s.setCigarString(s.getReadLength() + "M");
}

testSingleRecord(s, ref);
}
Expand Down
16 changes: 4 additions & 12 deletions src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
Expand Up @@ -68,22 +68,14 @@ public void testSAM() throws Exception {
Assert.assertEquals(((Number) rec.getAttribute(INTEGER_TAG)).intValue(), 1);
}

@Test(expectedExceptions = SAMException.class)
@Test
public void testUnsignedIntegerBAM() throws Exception {
SAMRecord rec = createSamRecord();
final long val = 1l + Integer.MAX_VALUE;
rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
Assert.fail("Exception should have been thrown.");
}

/**
* Cannot store unsigned int in SAM text format.
*/
@Test(expectedExceptions = SAMException.class)
public void testUnsignedIntegerSAM() throws Exception {
final SAMRecord rec = createSamRecord();
final long val = 1l + Integer.MAX_VALUE;
rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
Object roundTripValue = rec.getAttribute(UNSIGNED_INTEGER_TAG);
Assert.assertTrue(roundTripValue instanceof Long);
Assert.assertEquals(roundTripValue, val);
}

@Test
Expand Down

0 comments on commit ed25614

Please sign in to comment.