Skip to content

Commit

Permalink
Refactoring of VDJCAlignments on road:
Browse files Browse the repository at this point in the history
 - added SequenceHistory to each alignment
 - added original reads

! doesn't compiles
  • Loading branch information
PoslavskySV authored and dbolotin committed Nov 27, 2017
1 parent 6cc8f4e commit 80ef3d1
Show file tree
Hide file tree
Showing 10 changed files with 331 additions and 95 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Expand Up @@ -44,14 +44,14 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<milib.version>1.8.1</milib.version>
<milib.version>1.8.2-SNAPSHOT</milib.version>
</properties>

<dependencies>
<dependency>
<groupId>io.repseq</groupId>
<artifactId>repseqio</artifactId>
<version>1.2.10</version>
<version>1.2.11-SNAPSHOT</version>
</dependency>

<dependency>
Expand Down
185 changes: 185 additions & 0 deletions src/main/java/com/milaboratory/mixcr/basictypes/SequenceHistory.java
@@ -0,0 +1,185 @@
package com.milaboratory.mixcr.basictypes;

import com.milaboratory.util.ArraysUtils;

import static java.lang.Math.abs;
import static java.lang.Math.max;

public interface SequenceHistory {
/**
* Length of target
*/
int length();

/**
* Shift read index by specified value (used in .vdjca files merging)
*/
SequenceHistory shiftReadId(long shift);

/**
* Return all raw read ids (sorted) occurring in the history
*/
long[] readIds();

/**
* Return minimal raw read id occurring in the history
*/
long minReadId();

/**
* Initial event, starting point of the history (single fastq record read from file)
*/
final class RawSequence implements SequenceHistory {
/**
* Read index in the initial .fastq file
*/
public final long readId;
/**
* Read index in pair
*/
public final byte mateIndex;
/**
* Read length
*/
public final int length;
/**
* Is reverse complement
*/
public final boolean isReverseComplement;

public RawSequence(long readId, byte mateIndex, int length, boolean isReverseComplement) {
this.readId = readId;
this.mateIndex = mateIndex;
this.length = length;
this.isReverseComplement = isReverseComplement;
}

@Override
public int length() { return length; }

@Override
public SequenceHistory shiftReadId(long shift) {
return new RawSequence(readId + shift, mateIndex, length, isReverseComplement);
}

@Override
public long[] readIds() {
return new long[]{readId};
}

@Override
public long minReadId() {
return readId;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

RawSequence that = (RawSequence) o;

if (readId != that.readId) return false;
if (mateIndex != that.mateIndex) return false;
if (length != that.length) return false;
return isReverseComplement == that.isReverseComplement;
}

@Override
public int hashCode() {
int result = (int) (readId ^ (readId >>> 32));
result = 31 * result + (int) mateIndex;
result = 31 * result + length;
result = 31 * result + (isReverseComplement ? 1 : 0);
return result;
}
}

/**
* Parent for all events when two alignments or reads are merged into a single one
*/
abstract class Merge implements SequenceHistory {
/**
* Histories of merged alignments
*/
public final SequenceHistory left, right;
/**
* Position of the first nucleotide of the right target in the left target
*/
public final int offset;
/**
* Number of mismatches
*/
public final int nMismatches;

Merge(SequenceHistory left, SequenceHistory right, int offset, int nMismatches) {
this.left = left;
this.right = right;
this.offset = offset;
this.nMismatches = nMismatches;
}

@Override
public int length() {
return abs(offset) +
(offset >= 0 ?
max(left.length() - offset, right.length()) :
max(left.length(), right.length() + offset) // offset is negative here
);
}

@Override
public long[] readIds() {
return ArraysUtils.getSortedDistinct(ArraysUtils.concatenate(left.readIds(), right.readIds()));
}

@Override
public long minReadId() {
return Math.min(left.minReadId(), right.minReadId());
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

Merge merge = (Merge) o;

if (offset != merge.offset) return false;
if (nMismatches != merge.nMismatches) return false;
if (!left.equals(merge.left)) return false;
return right.equals(merge.right);
}

@Override
public int hashCode() {
int result = left.hashCode();
result = 31 * result + right.hashCode();
result = 31 * result + offset;
result = 31 * result + nMismatches;
return result;
}
}

final class SequenceOverlap extends Merge {
public SequenceOverlap(SequenceHistory left, SequenceHistory right, int offset, int nMismatches) {
super(left, right, offset, nMismatches);
}

@Override
public SequenceOverlap shiftReadId(long shift) {
return new SequenceOverlap(left.shiftReadId(shift), right.shiftReadId(shift), offset, nMismatches);
}
}

final class AlignmentOverlap extends Merge {
public AlignmentOverlap(SequenceHistory left, SequenceHistory right, int offset, int nMismatches) {
super(left, right, offset, nMismatches);
}

@Override
public AlignmentOverlap shiftReadId(long shift) {
return new AlignmentOverlap(left.shiftReadId(shift), right.shiftReadId(shift), offset, nMismatches);
}
}
}
124 changes: 67 additions & 57 deletions src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java
Expand Up @@ -29,44 +29,79 @@
package com.milaboratory.mixcr.basictypes;

import com.milaboratory.core.alignment.Alignment;
import com.milaboratory.core.io.sequence.SequenceRead;
import com.milaboratory.core.io.sequence.SequenceReadUtil;
import com.milaboratory.core.sequence.NSequenceWithQuality;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.primitivio.annotations.Serializable;
import io.repseq.core.GeneType;

import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.List;

@Serializable(by = IO.VDJCAlignmentsSerializer.class)
public final class VDJCAlignments extends VDJCObject {
volatile String[] targetDescriptions;
volatile NSequenceWithQuality[] originalSequences;
volatile String[] originalDescriptions;
final long readId;
final SequenceHistory[] history;
final SequenceRead[] originalReads;
private volatile long alignmentsIndex = -1;

public VDJCAlignments(long readId, long alignmentsIndex, VDJCAlignments alignments) {
super(alignments.hits, alignments.targets);
this.readId = readId;
public VDJCAlignments(long alignmentsIndex,
EnumMap<GeneType, VDJCHit[]> hits,
NSequenceWithQuality[] targets,
SequenceHistory[] history,
SequenceRead[] originalReads) {
super(hits, targets);
this.alignmentsIndex = alignmentsIndex;
this.targetDescriptions = alignments.targetDescriptions;
this.originalDescriptions = alignments.originalDescriptions;
this.history = history;
this.originalReads = originalReads;
}

public VDJCAlignments(long readId, EnumMap<GeneType, VDJCHit[]> hits, NSequenceWithQuality target) {
super(hits, new NSequenceWithQuality[]{target});
this.readId = readId;
public VDJCAlignments(EnumMap<GeneType, VDJCHit[]> hits,
NSequenceWithQuality[] targets,
SequenceHistory[] history,
SequenceRead[] originalReads) {
this(-1, hits, targets, history, originalReads);
}

public VDJCAlignments(long readId, EnumMap<GeneType, VDJCHit[]> hits, NSequenceWithQuality... targets) {
super(hits, targets);
this.readId = readId;
public VDJCAlignments(VDJCHit[] vHits, VDJCHit[] dHits, VDJCHit[] jHits, VDJCHit[] cHits,
NSequenceWithQuality[] targets,
SequenceHistory[] history,
SequenceRead[] originalReads) {
this(-1, createHits(vHits, dHits, jHits, cHits), targets, history, originalReads);
}

public VDJCAlignments shiftReadId(long newAlignmentIndex, long shift) {
return new VDJCAlignments(newAlignmentIndex, hits, targets, shift(history, shift), shift(originalReads, shift));
}

private static SequenceHistory[] shift(SequenceHistory[] data, long shift) {
SequenceHistory[] r = new SequenceHistory[data.length];
for (int i = 0; i < data.length; i++)
r[i] = data[i].shiftReadId(shift);
return r;
}

private static SequenceRead[] shift(SequenceRead[] data, long shift) {
if (data == null)
return null;
SequenceRead[] r = new SequenceRead[data.length];
for (int i = 0; i < data.length; i++)
r[i] = SequenceReadUtil.setReadId(data[i].getId() + shift, data[i]);
return r;
}

public VDJCAlignments(long readId, VDJCHit[] vHits, VDJCHit[] dHits, VDJCHit[] jHits, VDJCHit[] cHits,
NSequenceWithQuality... targets) {
super(vHits, dHits, jHits, cHits, targets);
this.readId = readId;
public SequenceHistory getHistory(int targetId) {
return history[targetId];
}

public List<SequenceRead> getOriginalReads() {
return originalReads == null ? null : Collections.unmodifiableList(Arrays.asList(originalReads));
}

public VDJCAlignments setHistory(SequenceHistory[] history, SequenceRead[] originalReads) {
return new VDJCAlignments(alignmentsIndex, hits, targets, history, originalReads);
}

public VDJCAlignments removeBestHitAlignment(GeneType geneType, int targetId) {
Expand All @@ -79,10 +114,7 @@ public VDJCAlignments removeBestHitAlignment(GeneType geneType, int targetId) {
gHits[0] = new VDJCHit(gHits[0].getGene(), als, gHits[0].getAlignedFeature());
Arrays.sort(gHits);
hits.put(geneType, gHits);

VDJCAlignments result = new VDJCAlignments(readId, hits, targets);
result.alignmentsIndex = alignmentsIndex;
return result;
return new VDJCAlignments(alignmentsIndex, hits, targets, history, originalReads);
}

public boolean hasNoHitsInTarget(int i) {
Expand All @@ -96,8 +128,15 @@ public boolean hasNoHitsInTarget(int i) {
return true;
}

public long getReadId() {
return readId;
private volatile long minReadId = -1;

public long getMinReadId() {
if (minReadId != -1)
return minReadId;
long min = Long.MAX_VALUE;
for (SequenceHistory s : history)
min = Math.min(s.minReadId(), min);
return minReadId = min;
}

public long getAlignmentsIndex() {
Expand All @@ -108,33 +147,6 @@ public void setAlignmentsIndex(long alignmentsIndex) {
this.alignmentsIndex = alignmentsIndex;
}

public void setOriginalDescriptions(String[] description) {
assert description == null || originalSequences == null || description.length == originalSequences.length;
this.originalDescriptions = description;
}

public String[] getOriginalDescriptions() {
return originalDescriptions;
}

public void setTargetDescriptions(String[] description) {
assert description == null || description.length == targets.length;
this.targetDescriptions = description;
}

public String[] getTargetDescriptions() {
return targetDescriptions;
}

public void setOriginalSequences(NSequenceWithQuality[] originalSequences) {
assert originalDescriptions == null || originalSequences == null || originalDescriptions.length == originalSequences.length;
this.originalSequences = originalSequences;
}

public NSequenceWithQuality[] getOriginalSequences() {
return originalSequences;
}

/**
* Returns {@code true} if at least one V and one J hit among first {@code top} hits have same chain and false
* otherwise (first {@code top} V hits have different chain from those have first {@code top} J hits).
Expand Down Expand Up @@ -202,20 +214,18 @@ private static int actualTop(VDJCHit[] hits, int top) {
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof VDJCAlignments)) return false;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;

VDJCAlignments that = (VDJCAlignments) o;

if (readId != that.readId) return false;

return true;
return Arrays.equals(history, that.history);
}

@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (int) (readId^(readId >>> 32));
result = 31 * result + Arrays.hashCode(history);
return result;
}
}

0 comments on commit 80ef3d1

Please sign in to comment.