Skip to content

Commit

Permalink
Fixed a bug in the locus accumulator where the accumulator didn't add… (
Browse files Browse the repository at this point in the history
#1489)

* Fixed a bug in the locus accumulator where the accumulator didn't add the right locusInfos causing "gaps" to appear.
  • Loading branch information
Yossi Farjoun committed Jul 7, 2020
1 parent 1da947b commit 78941e4
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 22 deletions.
29 changes: 19 additions & 10 deletions src/main/java/htsjdk/samtools/util/EdgeReadIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,33 @@ void accumulateSamRecord(SAMRecord rec) {
// 1-based reference position that the current base aligns to
final int refPos = alignmentBlock.getReferenceStart();

// 0-based offset from the aligned position of the first base in the read to the aligned position
// of the current base.
final int refOffset = refPos - rec.getAlignmentStart();
final int refOffsetEnd = refPos - rec.getAlignmentStart() + alignmentBlock.getLength();
if (accumulator.isEmpty()) {
accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), rec.getAlignmentStart()));
}

// The accumulator should always have LocusInfos that correspond to one consecutive segment of loci from one reference
// sequence. So
// accumulator.get(0).getPosition() + accumulator.size() == accumulator.get(accumulator.size()-1).getPosition()+1
final int accumulatorNextPosition = accumulator.get(0).getPosition() + accumulator.size();

if (accumulatorNextPosition != accumulator.get(accumulator.size() - 1).getPosition() + 1) {
throw new IllegalStateException("The accumulator has gotten into a funk. Cannot continue");
}

// Ensure there are AbstractLocusInfos up to and including this position
for (int j = accumulator.size(); j <= refOffsetEnd; ++j) {
accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()),
rec.getAlignmentStart() + j));
for (int locusPos = accumulatorNextPosition; locusPos <= refPos + alignmentBlock.getLength(); ++locusPos) {
accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), locusPos));
}

/* Let's assume an alignment block starts in some locus.
* We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus
* where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends.
*/
int refOffsetInterval = refOffset; // corresponds to the beginning of the alignment block
int refOffsetEndInterval = refOffsetEnd;

// 0-based offset from the aligned position of the first base in the read to the aligned position
// of the current base.
int refOffsetInterval = refPos - rec.getAlignmentStart(); // corresponds to the beginning of the alignment block
int refOffsetEndInterval = refOffsetInterval + alignmentBlock.getLength();;
int startShift = 0;

// intersect intervals and alignment block
Expand All @@ -133,7 +142,7 @@ void accumulateSamRecord(SAMRecord rec) {
}
// if the alignment block ends out of an interval, shift the ending position
final int readEnd = refPos + alignmentBlock.getLength();
if (refPos + alignmentBlock.getLength() > intervalEnd) {
if (readEnd > intervalEnd) {
refOffsetEndInterval = refOffsetEndInterval - (readEnd - intervalEnd) + 1;
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/htsjdk/samtools/BAMCigarOverflowTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import htsjdk.HtsjdkTest;
import htsjdk.samtools.util.CloserUtil;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;

import java.io.File;

import static org.testng.Assert.assertEquals;

/**
* Test the fix of a bug reported by s-andrews in which the use of an arithmetic rather than a logical right shift in BinaryCigarCodec.binaryCigarToCigarElement()
* causes an overflow in the CIGAR when reading a BAM file for a read that spans a very large intron.
Expand Down
23 changes: 22 additions & 1 deletion src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@

import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
Expand Down Expand Up @@ -311,7 +314,7 @@ public void testIntersectingAndNotInterval() {
EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals);
int locusPosition = 40;
while (iterator.hasNext()) {
AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
final AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
int position = next.getPosition();
assertEquals(locusPosition++, position);
if (position == 40) {
Expand All @@ -333,6 +336,24 @@ public void testIntersectingAndNotInterval() {
assertEquals(81, locusPosition);
}

@Test
public void testNoGapsInLocusAccumulator() {
final SamReader reader = SamReaderFactory.make().open(new File("src/test/resources/htsjdk/samtools/util/sliver.sam"));
final EdgeReadIterator iterator = new EdgeReadIterator(reader, null);

AbstractLocusInfo<EdgingRecordAndOffset> previous = null;
int counter = 0;
while (iterator.hasNext() && (previous == null || previous.getPosition() < 1_000_000)) {
counter++;
final AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
if (previous != null) {
Assert.assertEquals(next.getPosition(), previous.getPosition() + 1);
}
previous = next;
}
Assert.assertEquals(counter, 1_000_000);
}

/**
* Test for intersecting interval for read with a deletion in the middle
*/
Expand Down
17 changes: 7 additions & 10 deletions src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
*/
package htsjdk.samtools.util;


import htsjdk.HtsjdkTest;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
Expand All @@ -32,26 +31,24 @@
import org.testng.annotations.Test;

/**
*
* @author Mariia_Zueva@epam.com, EPAM Systems, Inc. <www.epam.com>
*
*/

public class EdgingRecordAndOffsetTest extends HtsjdkTest {
private final byte[] qualities = {30, 50, 50, 60, 60, 70 ,70, 70, 80, 90};
private final byte[] qualities = {30, 50, 50, 60, 60, 70, 70, 70, 80, 90};
private final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'};
private SAMRecord record;

@BeforeTest
public void setUp(){
public void setUp() {
record = new SAMRecord(new SAMFileHeader());
record.setReadName("testRecord");
record.setReadBases(bases);
record.setBaseQualities(qualities);
}

@Test
public void testConstructor(){
public void testConstructor() {
EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
Assert.assertEquals(qualities, typedRecordAndOffset.getBaseQualities());
Assert.assertEquals(bases, typedRecordAndOffset.getRecord().getReadBases());
Expand All @@ -62,30 +59,30 @@ public void testConstructor(){
}

@Test
public void testGetSetStart(){
public void testGetSetStart() {
EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
EdgingRecordAndOffset typedRecordAndOffsetEnd = EdgingRecordAndOffset.createEndRecord(typedRecordAndOffset);
Assert.assertEquals(typedRecordAndOffset, typedRecordAndOffsetEnd.getStart());
Assert.assertEquals(EdgingRecordAndOffset.Type.END, typedRecordAndOffsetEnd.getType());
}

@Test
public void testNotEqualsTypedRecords(){
public void testNotEqualsTypedRecords() {
EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3);
Assert.assertNotEquals(typedRecordAndOffset.getBaseQuality(), secondEdgingRecordAndOffset.getBaseQuality());
Assert.assertEquals(typedRecordAndOffset.getBaseQualities(), secondEdgingRecordAndOffset.getBaseQualities());
}

@Test
public void testGetOffset(){
public void testGetOffset() {
EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3);
Assert.assertEquals(70, secondEdgingRecordAndOffset.getBaseQuality());
Assert.assertEquals('C', secondEdgingRecordAndOffset.getReadBase());
}

@Test
public void testGetQualityAtPosition(){
public void testGetQualityAtPosition() {
EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 1);
Assert.assertEquals(50, secondEdgingRecordAndOffset.getBaseQuality(2));
}
Expand Down

0 comments on commit 78941e4

Please sign in to comment.